constantsToAdd = new HashSet<>();
+ for (Field runtimeField : runtimeFeature.getDeclaredFields()) {
+ if (runtimeField.isEnumConstant()) {
+ constantsToAdd.add(runtimeField.getName());
+ }
+ }
+ var dummy = new StringWriter();
+ var compiler = ToolProvider.getSystemJavaCompiler();
+ var source = Path.of(args[0]);
+ try (var fm = compiler.getStandardFileManager(null, null, null)) {
+ JavacTask task =
+ (JavacTask) compiler.getTask(dummy, null, null, null, null, fm.getJavaFileObjects(source));
+ task.analyze();
+ var sourceFeature = task.getElements()
+ .getTypeElement("jdk.internal.javac.PreviewFeature.Feature");
+ int insertPosition = -1;
+ for (var el : sourceFeature.getEnclosedElements()) {
+ if (el.getKind() == ElementKind.ENUM_CONSTANT) {
+ constantsToAdd.remove(el.getSimpleName().toString());
+ if (insertPosition == (-1)) {
+ var trees = Trees.instance(task);
+ var elPath = trees.getPath(el);
+ insertPosition = (int) trees.getSourcePositions()
+ .getStartPosition(elPath.getCompilationUnit(),
+ elPath.getLeaf());
+ }
+ }
+ }
+ var target = Path.of(args[1]);
+ Files.createDirectories(target.getParent());
+ if (constantsToAdd.isEmpty()) {
+ Files.copy(source, target, StandardCopyOption.REPLACE_EXISTING);
+ } else {
+ String sourceCode = Files.readString(source);
+ try (var out = Files.newBufferedWriter(target)) {
+ out.write(sourceCode, 0, insertPosition);
+ out.write(constantsToAdd.stream()
+ .collect(Collectors.joining(", ",
+ "/*compatibility constants:*/ ",
+ ",\n")));
+ out.write(sourceCode, insertPosition, sourceCode.length() - insertPosition);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/make/modules/java.base/Copy.gmk b/make/modules/java.base/Copy.gmk
index b8c1f2c05fa..43b9db651e0 100644
--- a/make/modules/java.base/Copy.gmk
+++ b/make/modules/java.base/Copy.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -172,6 +172,10 @@ ifeq ($(USE_EXTERNAL_LIBZ), true)
LEGAL_EXCLUDES += zlib.md
endif
+ifneq ($(TOOLCHAIN_TYPE), gcc)
+ LEGAL_EXCLUDES += gcc.md
+endif
+
$(eval $(call SetupCopyLegalFiles, COPY_LEGAL, \
EXCLUDES := $(LEGAL_EXCLUDES), \
))
diff --git a/make/modules/java.base/Launcher.gmk b/make/modules/java.base/Launcher.gmk
index 3a3920acb12..bfae0925c07 100644
--- a/make/modules/java.base/Launcher.gmk
+++ b/make/modules/java.base/Launcher.gmk
@@ -95,7 +95,8 @@ ifeq ($(call isTargetOsType, unix), true)
CFLAGS := $(VERSION_CFLAGS), \
EXTRA_HEADER_DIRS := libjava, \
EXTRA_OBJECT_FILES := \
- $(SUPPORT_OUTPUTDIR)/native/$(MODULE)/libjava/childproc$(OBJ_SUFFIX), \
+ $(SUPPORT_OUTPUTDIR)/native/$(MODULE)/libjava/childproc$(OBJ_SUFFIX) \
+ $(SUPPORT_OUTPUTDIR)/native/$(MODULE)/libjava/childproc_errorcodes$(OBJ_SUFFIX), \
LD_SET_ORIGIN := false, \
OUTPUT_DIR := $(SUPPORT_OUTPUTDIR)/modules_libs/$(MODULE), \
))
diff --git a/make/modules/java.desktop/lib/AwtLibraries.gmk b/make/modules/java.desktop/lib/AwtLibraries.gmk
index 463e09e12dc..887dfab01df 100644
--- a/make/modules/java.desktop/lib/AwtLibraries.gmk
+++ b/make/modules/java.desktop/lib/AwtLibraries.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -88,6 +88,10 @@ LIBAWT_EXTRA_HEADER_DIRS := \
LIBAWT_CFLAGS := -D__MEDIALIB_OLD_NAMES -D__USE_J2D_NAMES -DMLIB_NO_LIBSUNMATH
+ifeq ($(ENABLE_HEADLESS_ONLY), true)
+ LIBAWT_CFLAGS += -DHEADLESS
+endif
+
ifeq ($(call isTargetOs, windows), true)
LIBAWT_CFLAGS += -EHsc -DUNICODE -D_UNICODE -DMLIB_OS64BIT
LIBAWT_RCFLAGS ?= -I$(TOPDIR)/src/java.base/windows/native/launcher/icons
@@ -95,14 +99,16 @@ ifeq ($(call isTargetOs, windows), true)
$(TOPDIR)/src/$(MODULE)/windows/native/libawt/windows/awt.rc
endif
-# This is the object file to provide the dladdr API, which is not
-# part of AIX. It occurs several times in the jdk code base.
-# Do not include it. When statically linking the java
-# launcher with all JDK and VM static libraries, we use the
-# --whole-archive linker option. The duplicate objects in different
-# static libraries cause linking errors due to duplicate symbols.
ifeq ($(call isTargetOs, aix), true)
+ # This is the object file to provide the dladdr API, which is not
+ # part of AIX. It occurs several times in the jdk code base.
+ # Do not include it. When statically linking the java
+ # launcher with all JDK and VM static libraries, we use the
+ # --whole-archive linker option. The duplicate objects in different
+ # static libraries cause linking errors due to duplicate symbols.
LIBAWT_STATIC_EXCLUDE_OBJS := porting_aix.o
+
+ LIBAWT_CFLAGS += -I$(TOPDIR)/src/java.base/aix/native/include
endif
# -fgcse-after-reload improves performance of MaskFill in Java2D by 20% for
@@ -167,11 +173,18 @@ ifeq ($(call isTargetOs, windows macosx), false)
$(TOPDIR)/src/$(MODULE)/$(OPENJDK_TARGET_OS_TYPE)/native/common/awt \
#
+ LIBAWT_HEADLESS_EXCLUDE_FILES := \
+ GLXGraphicsConfig.c \
+ GLXSurfaceData.c \
+ X11PMBlitLoops.c \
+ X11Renderer.c \
+ X11SurfaceData.c \
+ #
+
LIBAWT_HEADLESS_EXTRA_HEADER_DIRS := \
$(LIBAWT_DEFAULT_HEADER_DIRS) \
common/awt/debug \
common/font \
- common/java2d/opengl \
java.base:libjvm \
#
@@ -191,7 +204,8 @@ ifeq ($(call isTargetOs, windows macosx), false)
$(eval $(call SetupJdkLibrary, BUILD_LIBAWT_HEADLESS, \
NAME := awt_headless, \
EXTRA_SRC := $(LIBAWT_HEADLESS_EXTRA_SRC), \
- EXCLUDES := medialib, \
+ EXCLUDES := medialib opengl, \
+ EXCLUDE_FILES := $(LIBAWT_HEADLESS_EXCLUDE_FILES), \
ONLY_EXPORTED := $(LIBAWT_HEADLESS_ONLY_EXPORTED), \
OPTIMIZATION := LOW, \
CFLAGS := -DHEADLESS=true $(CUPS_CFLAGS) $(FONTCONFIG_CFLAGS) \
@@ -411,6 +425,9 @@ endif
ifeq ($(call isTargetOs, linux)+$(ENABLE_HEADLESS_ONLY), true+true)
LIBJAWT_CFLAGS += -DHEADLESS
endif
+ifeq ($(call isTargetOs, aix)+$(ENABLE_HEADLESS_ONLY), true+true)
+ LIBJAWT_CFLAGS += -DHEADLESS
+endif
ifeq ($(call isTargetOs, windows)+$(call isTargetCpu, x86), true+true)
LIBJAWT_LIBS_windows := kernel32.lib
diff --git a/make/modules/java.desktop/lib/ClientLibraries.gmk b/make/modules/java.desktop/lib/ClientLibraries.gmk
index b76cb8dc4e3..3e37fe79643 100644
--- a/make/modules/java.desktop/lib/ClientLibraries.gmk
+++ b/make/modules/java.desktop/lib/ClientLibraries.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -257,6 +257,7 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)
DISABLED_WARNINGS_microsoft_dgif_lib.c := 4018 4267, \
DISABLED_WARNINGS_microsoft_splashscreen_impl.c := 4018 4267 4244, \
DISABLED_WARNINGS_microsoft_splashscreen_png.c := 4267, \
+ DISABLED_WARNINGS_microsoft_pngread.c := 4146, \
DISABLED_WARNINGS_microsoft_splashscreen_sys.c := 4267 4244, \
LDFLAGS := $(ICONV_LDFLAGS), \
LDFLAGS_windows := -delayload:user32.dll, \
@@ -338,11 +339,8 @@ else
# noexcept-type required for GCC 7 builds. Not required for GCC 8+.
# expansion-to-defined required for GCC 9 builds. Not required for GCC 10+.
# maybe-uninitialized required for GCC 8 builds. Not required for GCC 9+.
- # calloc-transposed-args required for GCC 14 builds. (fixed upstream in
- # Harfbuzz 032c931e1c0cfb20f18e5acb8ba005775242bd92)
HARFBUZZ_DISABLED_WARNINGS_CXX_gcc := class-memaccess noexcept-type \
- expansion-to-defined dangling-reference maybe-uninitialized \
- calloc-transposed-args
+ expansion-to-defined dangling-reference maybe-uninitialized
HARFBUZZ_DISABLED_WARNINGS_clang := missing-field-initializers \
range-loop-analysis unused-variable
HARFBUZZ_DISABLED_WARNINGS_microsoft := 4267 4244
diff --git a/make/modules/jdk.hotspot.agent/Lib.gmk b/make/modules/jdk.hotspot.agent/Lib.gmk
index ed8de631dc3..da02e0dab39 100644
--- a/make/modules/jdk.hotspot.agent/Lib.gmk
+++ b/make/modules/jdk.hotspot.agent/Lib.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -55,6 +55,12 @@ else
LIBSAPROC_LINK_TYPE := C
endif
+# DWARF related sources would be included on supported platforms only.
+LIBSAPROC_EXCLUDE_FILES :=
+ifneq ($(call And, $(call isTargetOs, linux) $(call isTargetCpu, x86_64 aarch64)), true)
+ LIBSAPROC_EXCLUDE_FILES := DwarfParser.cpp dwarf.cpp
+endif
+
$(eval $(call SetupJdkLibrary, BUILD_LIBSAPROC, \
NAME := saproc, \
LINK_TYPE := $(LIBSAPROC_LINK_TYPE), \
@@ -70,6 +76,7 @@ $(eval $(call SetupJdkLibrary, BUILD_LIBSAPROC, \
CFLAGS := $(LIBSAPROC_CFLAGS), \
CXXFLAGS := $(LIBSAPROC_CFLAGS) $(LIBSAPROC_CXXFLAGS), \
EXTRA_SRC := $(LIBSAPROC_EXTRA_SRC), \
+ EXCLUDE_FILES := $(LIBSAPROC_EXCLUDE_FILES), \
JDK_LIBS := java.base:libjava, \
LIBS_linux := $(LIBDL), \
LIBS_macosx := \
diff --git a/make/modules/jdk.jpackage/Java.gmk b/make/modules/jdk.jpackage/Java.gmk
index da66fc14009..1fd4d527217 100644
--- a/make/modules/jdk.jpackage/Java.gmk
+++ b/make/modules/jdk.jpackage/Java.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -29,7 +29,7 @@ DISABLED_WARNINGS_java += dangling-doc-comments
COPY += .gif .png .txt .spec .script .prerm .preinst \
.postrm .postinst .list .sh .desktop .copyright .control .plist .template \
- .icns .scpt .wxs .wxl .wxi .wxf .ico .bmp .tiff .service .xsl
+ .icns .scpt .wxs .wxl .wxi .wxf .ico .bmp .tiff .service .xsl .js
CLEAN += .properties
diff --git a/make/modules/jdk.jpackage/Lib.gmk b/make/modules/jdk.jpackage/Lib.gmk
index 704436bbde6..86b11bdafee 100644
--- a/make/modules/jdk.jpackage/Lib.gmk
+++ b/make/modules/jdk.jpackage/Lib.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -54,7 +54,7 @@ $(eval $(call SetupJdkExecutable, BUILD_JPACKAGEAPPLAUNCHER, \
SRC := applauncher, \
EXTRA_SRC := common, \
INCLUDE_FILES := $(JPACKAGEAPPLAUNCHER_INCLUDE_FILES), \
- OPTIMIZATION := LOW, \
+ OPTIMIZATION := SIZE, \
DISABLED_WARNINGS_clang_JvmLauncherLib.c := format-nonliteral, \
DISABLED_WARNINGS_clang_LinuxPackage.c := format-nonliteral, \
DISABLED_WARNINGS_clang_Log.cpp := unused-const-variable, \
@@ -91,7 +91,7 @@ ifeq ($(call isTargetOs, linux), true)
common, \
EXCLUDE_FILES := LinuxLauncher.c LinuxPackage.c, \
LINK_TYPE := C++, \
- OPTIMIZATION := LOW, \
+ OPTIMIZATION := SIZE, \
DISABLED_WARNINGS_gcc_Log.cpp := unused-const-variable, \
DISABLED_WARNINGS_clang_JvmLauncherLib.c := format-nonliteral, \
DISABLED_WARNINGS_clang_tstrings.cpp := format-nonliteral, \
diff --git a/make/scripts/fixpath.sh b/make/scripts/fixpath.sh
index 6a524df4c68..78690f1f2cc 100644
--- a/make/scripts/fixpath.sh
+++ b/make/scripts/fixpath.sh
@@ -88,7 +88,10 @@ function setup() {
fi
if [[ -z ${CMD+x} ]]; then
- CMD="$DRIVEPREFIX/c/windows/system32/cmd.exe"
+ CMD="$(type -p cmd.exe 2>/dev/null)"
+ if [[ -z "$CMD" ]]; then
+ CMD="$DRIVEPREFIX/c/windows/system32/cmd.exe"
+ fi
fi
if [[ -z ${WINTEMP+x} ]]; then
diff --git a/make/test/JtregNativeJdk.gmk b/make/test/JtregNativeJdk.gmk
index 0482011f561..6774e708f99 100644
--- a/make/test/JtregNativeJdk.gmk
+++ b/make/test/JtregNativeJdk.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -63,7 +63,8 @@ ifeq ($(call isTargetOs, windows), true)
BUILD_JDK_JTREG_EXCLUDE += libDirectIO.c libInheritedChannel.c \
libExplicitAttach.c libImplicitAttach.c \
exelauncher.c libFDLeaker.c exeFDLeakTester.c \
- libChangeSignalDisposition.c exePrintSignalDisposition.c
+ libChangeSignalDisposition.c exePrintSignalDisposition.c \
+ libConcNativeFork.c libPipesCloseOnExec.c
BUILD_JDK_JTREG_EXECUTABLES_LIBS_exeNullCallerTest := $(LIBCXX)
BUILD_JDK_JTREG_EXECUTABLES_LIBS_exerevokeall := advapi32.lib
@@ -77,6 +78,9 @@ else
BUILD_JDK_JTREG_LIBRARIES_LDFLAGS_libLinkerInvokerUnnamed := -pthread
BUILD_JDK_JTREG_LIBRARIES_LDFLAGS_libLinkerInvokerModule := -pthread
BUILD_JDK_JTREG_LIBRARIES_LDFLAGS_libLoaderLookupInvoker := -pthread
+ BUILD_JDK_JTREG_LIBRARIES_LDFLAGS_libConcNativeFork := -pthread
+ BUILD_JDK_JTREG_LIBRARIES_LDFLAGS_libPipesCloseOnExec := -pthread
+ BUILD_JDK_JTREG_LIBRARIES_LDFLAGS_libLoaderLookupInvoker := -pthread
BUILD_JDK_JTREG_LIBRARIES_LDFLAGS_libExplicitAttach := -pthread
BUILD_JDK_JTREG_LIBRARIES_LDFLAGS_libImplicitAttach := -pthread
diff --git a/src/demo/share/jfc/TableExample/OldJTable.java b/src/demo/share/jfc/TableExample/OldJTable.java
deleted file mode 100644
index 8c77978fe8a..00000000000
--- a/src/demo/share/jfc/TableExample/OldJTable.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * - Neither the name of Oracle nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
- * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * This source code is provided to illustrate the usage of a given feature
- * or technique and has been deliberately simplified. Additional steps
- * required for a production-quality application, such as security checks,
- * input validation and proper error handling, might not be present in
- * this sample code.
- */
-
-
-
-import java.util.EventObject;
-import java.util.List;
-import javax.swing.JTable;
-import javax.swing.table.DefaultTableModel;
-import javax.swing.table.TableCellEditor;
-import javax.swing.table.TableCellRenderer;
-import javax.swing.table.TableColumn;
-
-
-/**
- * The OldJTable is an unsupported class containing some methods that were
- * deleted from the JTable between releases 0.6 and 0.7
- */
-@SuppressWarnings("serial")
-public class OldJTable extends JTable
-{
- /*
- * A new convenience method returning the index of the column in the
- * co-ordinate space of the view.
- */
- public int getColumnIndex(Object identifier) {
- return getColumnModel().getColumnIndex(identifier);
- }
-
-//
-// Methods deleted from the JTable because they only work with the
-// DefaultTableModel.
-//
-
- public TableColumn addColumn(Object columnIdentifier, int width) {
- return addColumn(columnIdentifier, width, null, null, null);
- }
-
- public TableColumn addColumn(Object columnIdentifier, List> columnData) {
- return addColumn(columnIdentifier, -1, null, null, columnData);
- }
-
- // Override the new JTable implementation - it will not add a column to the
- // DefaultTableModel.
- public TableColumn addColumn(Object columnIdentifier, int width,
- TableCellRenderer renderer,
- TableCellEditor editor) {
- return addColumn(columnIdentifier, width, renderer, editor, null);
- }
-
- public TableColumn addColumn(Object columnIdentifier, int width,
- TableCellRenderer renderer,
- TableCellEditor editor, List> columnData) {
- checkDefaultTableModel();
-
- // Set up the model side first
- DefaultTableModel m = (DefaultTableModel)getModel();
- m.addColumn(columnIdentifier, columnData.toArray());
-
- // The column will have been added to the end, so the index of the
- // column in the model is the last element.
- TableColumn newColumn = new TableColumn(
- m.getColumnCount()-1, width, renderer, editor);
- super.addColumn(newColumn);
- return newColumn;
- }
-
- // Not possilble to make this work the same way ... change it so that
- // it does not delete columns from the model.
- public void removeColumn(Object columnIdentifier) {
- super.removeColumn(getColumn(columnIdentifier));
- }
-
- public void addRow(Object[] rowData) {
- checkDefaultTableModel();
- ((DefaultTableModel)getModel()).addRow(rowData);
- }
-
- public void addRow(List> rowData) {
- checkDefaultTableModel();
- ((DefaultTableModel)getModel()).addRow(rowData.toArray());
- }
-
- public void removeRow(int rowIndex) {
- checkDefaultTableModel();
- ((DefaultTableModel)getModel()).removeRow(rowIndex);
- }
-
- public void moveRow(int startIndex, int endIndex, int toIndex) {
- checkDefaultTableModel();
- ((DefaultTableModel)getModel()).moveRow(startIndex, endIndex, toIndex);
- }
-
- public void insertRow(int rowIndex, Object[] rowData) {
- checkDefaultTableModel();
- ((DefaultTableModel)getModel()).insertRow(rowIndex, rowData);
- }
-
- public void insertRow(int rowIndex, List> rowData) {
- checkDefaultTableModel();
- ((DefaultTableModel)getModel()).insertRow(rowIndex, rowData.toArray());
- }
-
- public void setNumRows(int newSize) {
- checkDefaultTableModel();
- ((DefaultTableModel)getModel()).setNumRows(newSize);
- }
-
- public void setDataVector(Object[][] newData, List> columnIds) {
- checkDefaultTableModel();
- ((DefaultTableModel)getModel()).setDataVector(
- newData, columnIds.toArray());
- }
-
- public void setDataVector(Object[][] newData, Object[] columnIds) {
- checkDefaultTableModel();
- ((DefaultTableModel)getModel()).setDataVector(newData, columnIds);
- }
-
- protected void checkDefaultTableModel() {
- if(!(dataModel instanceof DefaultTableModel))
- throw new InternalError("In order to use this method, the data model must be an instance of DefaultTableModel.");
- }
-
-//
-// Methods removed from JTable in the move from identifiers to ints.
-//
-
- public Object getValueAt(Object columnIdentifier, int rowIndex) {
- return super.getValueAt(rowIndex, getColumnIndex(columnIdentifier));
- }
-
- public boolean isCellEditable(Object columnIdentifier, int rowIndex) {
- return super.isCellEditable(rowIndex, getColumnIndex(columnIdentifier));
- }
-
- public void setValueAt(Object aValue, Object columnIdentifier, int rowIndex) {
- super.setValueAt(aValue, rowIndex, getColumnIndex(columnIdentifier));
- }
-
- public boolean editColumnRow(Object identifier, int row) {
- return super.editCellAt(row, getColumnIndex(identifier));
- }
-
- public void moveColumn(Object columnIdentifier, Object targetColumnIdentifier) {
- moveColumn(getColumnIndex(columnIdentifier),
- getColumnIndex(targetColumnIdentifier));
- }
-
- public boolean isColumnSelected(Object identifier) {
- return isColumnSelected(getColumnIndex(identifier));
- }
-
- public TableColumn addColumn(int modelColumn, int width) {
- return addColumn(modelColumn, width, null, null);
- }
-
- public TableColumn addColumn(int modelColumn) {
- return addColumn(modelColumn, 75, null, null);
- }
-
- /**
- * Creates a new column with modelColumn, width,
- * renderer, and editor and adds it to the end of
- * the JTable's array of columns. This method also retrieves the
- * name of the column using the model's getColumnName(modelColumn)
- * method, and sets the both the header value and the identifier
- * for this TableColumn accordingly.
- *
- * The modelColumn is the index of the column in the model which
- * will supply the data for this column in the table. This, like the
- * columnIdentifier in previous releases, does not change as the
- * columns are moved in the view.
- *
- * For the rest of the JTable API, and all of its associated classes,
- * columns are referred to in the co-ordinate system of the view, the
- * index of the column in the model is kept inside the TableColumn
- * and is used only to retrieve the information from the appropraite
- * column in the model.
- *
- *
- * @param modelColumn The index of the column in the model
- * @param width The new column's width. Or -1 to use
- * the default width
- * @param renderer The renderer used with the new column.
- * Or null to use the default renderer.
- * @param editor The editor used with the new column.
- * Or null to use the default editor.
- */
- public TableColumn addColumn(int modelColumn, int width,
- TableCellRenderer renderer,
- TableCellEditor editor) {
- TableColumn newColumn = new TableColumn(
- modelColumn, width, renderer, editor);
- addColumn(newColumn);
- return newColumn;
- }
-
-//
-// Methods that had their arguments switched.
-//
-
-// These won't work with the new table package.
-
-/*
- public Object getValueAt(int columnIndex, int rowIndex) {
- return super.getValueAt(rowIndex, columnIndex);
- }
-
- public boolean isCellEditable(int columnIndex, int rowIndex) {
- return super.isCellEditable(rowIndex, columnIndex);
- }
-
- public void setValueAt(Object aValue, int columnIndex, int rowIndex) {
- super.setValueAt(aValue, rowIndex, columnIndex);
- }
-*/
-
- public boolean editColumnRow(int columnIndex, int rowIndex) {
- return super.editCellAt(rowIndex, columnIndex);
- }
-
- public boolean editColumnRow(int columnIndex, int rowIndex, EventObject e){
- return super.editCellAt(rowIndex, columnIndex, e);
- }
-
-
-} // End Of Class OldJTable
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index b9252cc56ff..53fa4e3066c 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved.
// Copyright 2025 Arm Limited and/or its affiliates.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -1182,12 +1182,12 @@ class CallStubImpl {
public:
// Size of call trampoline stub.
static uint size_call_trampoline() {
- return 0; // no call trampolines on this platform
+ return MacroAssembler::max_trampoline_stub_size();
}
// number of relocations needed by a call trampoline stub
static uint reloc_call_trampoline() {
- return 0; // no call trampolines on this platform
+ return 5; // metadata; call dest; trampoline address; trampoline destination; trampoline_owner_metadata
}
};
@@ -1229,7 +1229,7 @@ public:
// predicate controlling addressing modes
bool size_fits_all_mem_uses(AddPNode* addp, int shift);
- // Convert BootTest condition to Assembler condition.
+ // Convert BoolTest condition to Assembler condition.
// Replicate the logic of cmpOpOper::ccode() and cmpOpUOper::ccode().
Assembler::Condition to_assembler_cond(BoolTest::mask cond);
%}
@@ -2233,15 +2233,9 @@ uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{
st->print_cr("# MachUEPNode");
- if (UseCompressedClassPointers) {
- st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
- st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
- st->print_cr("\tcmpw rscratch1, r10");
- } else {
- st->print_cr("\tldr rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
- st->print_cr("\tldr r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
- st->print_cr("\tcmp rscratch1, r10");
- }
+ st->print_cr("\tldrw rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+ st->print_cr("\tldrw r10, [rscratch2 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
+ st->print_cr("\tcmpw rscratch1, r10");
st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
}
#endif
@@ -2467,11 +2461,8 @@ bool Matcher::is_generic_vector(MachOper* opnd) {
return opnd->opcode() == VREG;
}
+#ifdef ASSERT
// Return whether or not this register is ever used as an argument.
-// This function is used on startup to build the trampoline stubs in
-// generateOptoStub. Registers not mentioned will be killed by the VM
-// call in the trampoline, and arguments in those registers not be
-// available to the callee.
bool Matcher::can_be_java_arg(int reg)
{
return
@@ -2492,11 +2483,7 @@ bool Matcher::can_be_java_arg(int reg)
reg == V6_num || reg == V6_H_num ||
reg == V7_num || reg == V7_H_num;
}
-
-bool Matcher::is_spillable_arg(int reg)
-{
- return can_be_java_arg(reg);
-}
+#endif
uint Matcher::int_pressure_limit()
{
@@ -2531,10 +2518,6 @@ uint Matcher::float_pressure_limit()
return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.size() : FLOATPRESSURE;
}
-bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
- return false;
-}
-
const RegMask& Matcher::divI_proj_mask() {
ShouldNotReachHere();
return RegMask::EMPTY;
@@ -2579,7 +2562,7 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
return true;
}
-// Convert BootTest condition to Assembler condition.
+// Convert BoolTest condition to Assembler condition.
// Replicate the logic of cmpOpOper::ccode() and cmpOpUOper::ccode().
Assembler::Condition to_assembler_cond(BoolTest::mask cond) {
Assembler::Condition result;
@@ -3403,11 +3386,13 @@ encode %{
} else if (rtype == relocInfo::metadata_type) {
__ mov_metadata(dst_reg, (Metadata*)con);
} else {
- assert(rtype == relocInfo::none, "unexpected reloc type");
+ assert(rtype == relocInfo::none || rtype == relocInfo::external_word_type, "unexpected reloc type");
+ // load fake address constants using a normal move
if (! __ is_valid_AArch64_address(con) ||
con < (address)(uintptr_t)os::vm_page_size()) {
__ mov(dst_reg, con);
} else {
+ // no reloc so just use adrp and add
uint64_t offset;
__ adrp(dst_reg, con, offset);
__ add(dst_reg, dst_reg, offset);
@@ -3812,11 +3797,6 @@ frame %{
// Compiled code's Frame Pointer
frame_pointer(R31);
- // Interpreter stores its frame pointer in a register which is
- // stored to the stack by I2CAdaptors.
- // I2CAdaptors convert from interpreted java to compiled java.
- interpreter_frame_pointer(R29);
-
// Stack alignment requirement
stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
@@ -4535,6 +4515,18 @@ operand immP_1()
interface(CONST_INTER);
%}
+// AOT Runtime Constants Address
+operand immAOTRuntimeConstantsAddress()
+%{
+ // Check if the address is in the range of AOT Runtime Constants
+ predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
+ match(ConP);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
// Float and Double operands
// Double Immediate
operand immD()
@@ -6898,6 +6890,20 @@ instruct loadConP1(iRegPNoSp dst, immP_1 con)
ins_pipe(ialu_imm);
%}
+instruct loadAOTRCAddress(iRegPNoSp dst, immAOTRuntimeConstantsAddress con)
+%{
+ match(Set dst con);
+
+ ins_cost(INSN_COST);
+ format %{ "adr $dst, $con\t# AOT Runtime Constants Address" %}
+
+ ins_encode %{
+ __ load_aotrc_address($dst$$Register, (address)$con$$constant);
+ %}
+
+ ins_pipe(ialu_imm);
+%}
+
// Load Narrow Pointer Constant
instruct loadConN(iRegNNoSp dst, immN con)
@@ -8008,6 +8014,21 @@ instruct membar_release_lock() %{
ins_pipe(pipe_serial);
%}
+instruct membar_storeload() %{
+ match(MemBarStoreLoad);
+ ins_cost(VOLATILE_REF_COST*100);
+
+ format %{ "MEMBAR-store-load\n\t"
+ "dmb ish" %}
+
+ ins_encode %{
+ __ block_comment("membar_storeload");
+ __ membar(Assembler::StoreLoad);
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
instruct unnecessary_membar_volatile() %{
predicate(unnecessary_volatile(n));
match(MemBarVolatile);
@@ -8037,6 +8058,20 @@ instruct membar_volatile() %{
ins_pipe(pipe_serial);
%}
+instruct membar_full() %{
+ match(MemBarFull);
+ ins_cost(VOLATILE_REF_COST*100);
+
+ format %{ "membar_full\n\t"
+ "dmb ish" %}
+ ins_encode %{
+ __ block_comment("membar_full");
+ __ membar(Assembler::AnyAny);
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
// ============================================================================
// Cast/Convert Instructions
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad
index 78ef121bd29..4c854913e63 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
-// Copyright (c) 2020, 2025, Arm Limited. All rights reserved.
+// Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, 2026, Arm Limited. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -201,6 +201,8 @@ source %{
case Op_XorReductionV:
case Op_MinReductionV:
case Op_MaxReductionV:
+ case Op_UMinReductionV:
+ case Op_UMaxReductionV:
// Reductions with less than 8 bytes vector length are
// not supported.
if (length_in_bytes < 8) {
@@ -245,10 +247,39 @@ source %{
case Op_MinVHF:
case Op_MaxVHF:
case Op_SqrtVHF:
+ if (UseSVE == 0 && !is_feat_fp16_supported()) {
+ return false;
+ }
+ break;
+ // At the time of writing this, the Vector API has no half-float (FP16) species.
+ // Consequently, AddReductionVHF and MulReductionVHF are only produced by the
+ // auto-vectorizer, which requires strictly ordered semantics for FP reductions.
+ //
+ // There is no direct Neon instruction that performs strictly ordered floating
+ // point add reduction. Hence, on Neon only machines, the add reduction operation
+ // is implemented as a scalarized sequence using half-precision scalar instruction
+ // FADD which requires FEAT_FP16 and ASIMDHP to be available on the target.
+ // On SVE machines (UseSVE > 0) however, there is a direct instruction (FADDA) which
+ // implements strictly ordered floating point add reduction which does not require
+ // the FEAT_FP16 and ASIMDHP checks as SVE supports half-precision floats by default.
+ case Op_AddReductionVHF:
// FEAT_FP16 is enabled if both "fphp" and "asimdhp" features are supported.
// Only the Neon instructions need this check. SVE supports half-precision floats
// by default.
- if (UseSVE == 0 && !is_feat_fp16_supported()) {
+ if (length_in_bytes < 8 || (UseSVE == 0 && !is_feat_fp16_supported())) {
+ return false;
+ }
+ break;
+ case Op_MulReductionVHF:
+ // There are no direct Neon/SVE instructions that perform strictly ordered
+ // floating point multiply reduction.
+ // For vector length ≤ 16 bytes, the reduction is implemented as a scalarized
+ // sequence using half-precision scalar instruction FMUL. This path requires
+ // FEAT_FP16 and ASIMDHP to be available on the target.
+ // For vector length > 16 bytes, this operation is disabled because there is no
+ // direct SVE instruction that performs a strictly ordered FP16 multiply
+ // reduction.
+ if (length_in_bytes < 8 || length_in_bytes > 16 || !is_feat_fp16_supported()) {
return false;
}
break;
@@ -298,6 +329,7 @@ source %{
case Op_VectorRearrange:
case Op_MulReductionVD:
case Op_MulReductionVF:
+ case Op_MulReductionVHF:
case Op_MulReductionVI:
case Op_MulReductionVL:
case Op_CompressBitsV:
@@ -362,6 +394,7 @@ source %{
case Op_VectorMaskCmp:
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
+ case Op_AddReductionVHF:
case Op_AddReductionVF:
case Op_AddReductionVD:
case Op_AndReductionV:
@@ -383,6 +416,8 @@ source %{
return !VM_Version::use_neon_for_vector(length_in_bytes);
case Op_MinReductionV:
case Op_MaxReductionV:
+ case Op_UMinReductionV:
+ case Op_UMaxReductionV:
// For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON
// instructions rather than SVE predicated instructions for
// better performance.
@@ -593,13 +628,9 @@ instruct vloadcon(vReg dst, immI0 src) %{
BasicType bt = Matcher::vector_element_basic_type(this);
if (UseSVE == 0) {
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+ int entry_idx = __ vector_iota_entry_index(bt);
assert(length_in_bytes <= 16, "must be");
- // The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 16.
- int offset = exact_log2(type2aelembytes(bt)) << 4;
- if (is_floating_point_type(bt)) {
- offset += 32;
- }
- __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices() + offset));
+ __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices(entry_idx)));
if (length_in_bytes == 16) {
__ ldrq($dst$$FloatRegister, rscratch1);
} else {
@@ -3402,6 +3433,44 @@ instruct reduce_non_strict_order_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vR
ins_pipe(pipe_slow);
%}
+// Add Reduction for Half floats (FP16).
+// Neon does not provide direct instructions for strictly ordered floating-point add reductions.
+// On Neon-only targets (UseSVE = 0), this operation is implemented as a sequence of scalar additions:
+// values equal to the vector width are loaded into a vector register, each lane is extracted,
+// and its value is accumulated into the running sum, producing a final scalar result.
+instruct reduce_addHF_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
+ predicate(UseSVE == 0);
+ match(Set dst (AddReductionVHF fsrc vsrc));
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "reduce_addHF $dst, $fsrc, $vsrc\t# 4HF/8HF. KILL $tmp" %}
+ ins_encode %{
+ uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
+ __ neon_reduce_add_fp16($dst$$FloatRegister, $fsrc$$FloatRegister,
+ $vsrc$$FloatRegister, length_in_bytes, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// This rule calculates the reduction result in strict order. Two cases will
+// reach here:
+// 1. Non strictly-ordered AddReductionVHF when vector size > 128-bits. For example -
+// AddReductionVHF generated by Vector API. For vector size > 128-bits, it is more
+// beneficial performance-wise to generate direct SVE instruction even if it is
+// strictly ordered.
+// 2. Strictly-ordered AddReductionVHF. For example - AddReductionVHF generated by
+// auto-vectorization on SVE machine.
+instruct reduce_addHF_sve(vRegF dst_src1, vReg src2) %{
+ predicate(UseSVE > 0);
+ match(Set dst_src1 (AddReductionVHF dst_src1 src2));
+ format %{ "reduce_addHF_sve $dst_src1, $dst_src1, $src2" %}
+ ins_encode %{
+ uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src2);
+ assert(length_in_bytes == MaxVectorSize, "invalid vector length");
+ __ sve_fadda($dst_src1$$FloatRegister, __ H, ptrue, $src2$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
// This rule calculates the reduction result in strict order. Two cases will
// reach here:
// 1. Non strictly-ordered AddReductionVF when vector size > 128-bits. For example -
@@ -3492,12 +3561,14 @@ instruct reduce_addL_masked(iRegLNoSp dst, iRegL isrc, vReg vsrc, pRegGov pg, vR
ins_pipe(pipe_slow);
%}
-instruct reduce_addF_masked(vRegF dst_src1, vReg src2, pRegGov pg) %{
+instruct reduce_addFHF_masked(vRegF dst_src1, vReg src2, pRegGov pg) %{
predicate(UseSVE > 0);
+ match(Set dst_src1 (AddReductionVHF (Binary dst_src1 src2) pg));
match(Set dst_src1 (AddReductionVF (Binary dst_src1 src2) pg));
- format %{ "reduce_addF_masked $dst_src1, $pg, $dst_src1, $src2" %}
+ format %{ "reduce_addFHF_masked $dst_src1, $pg, $dst_src1, $src2" %}
ins_encode %{
- __ sve_fadda($dst_src1$$FloatRegister, __ S,
+ BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+ __ sve_fadda($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
$pg$$PRegister, $src2$$FloatRegister);
%}
ins_pipe(pipe_slow);
@@ -3545,14 +3616,17 @@ instruct reduce_mulL(iRegLNoSp dst, iRegL isrc, vReg vsrc) %{
ins_pipe(pipe_slow);
%}
-instruct reduce_mulF(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
+
+instruct reduce_mulFHF(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
predicate(Matcher::vector_length_in_bytes(n->in(2)) <= 16);
+ match(Set dst (MulReductionVHF fsrc vsrc));
match(Set dst (MulReductionVF fsrc vsrc));
effect(TEMP_DEF dst, TEMP tmp);
- format %{ "reduce_mulF $dst, $fsrc, $vsrc\t# 2F/4F. KILL $tmp" %}
+ format %{ "reduce_mulFHF $dst, $fsrc, $vsrc\t# 2F/4F/4HF/8HF. KILL $tmp" %}
ins_encode %{
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
- __ neon_reduce_mul_fp($dst$$FloatRegister, T_FLOAT, $fsrc$$FloatRegister,
+ BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
+ __ neon_reduce_mul_fp($dst$$FloatRegister, bt, $fsrc$$FloatRegister,
$vsrc$$FloatRegister, length_in_bytes, $tmp$$FloatRegister);
%}
ins_pipe(pipe_slow);
@@ -4218,6 +4292,224 @@ instruct reduce_minD_masked(vRegD dst, vRegD dsrc, vReg vsrc, pRegGov pg) %{
ins_pipe(pipe_slow);
%}
+// -------------------- Vector reduction unsigned min/max ----------------------
+
+// reduction uminI
+
+instruct reduce_uminI_neon(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
+ vReg tmp, rFlagsReg cr) %{
+ predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) &&
+ (Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
+ Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
+ Matcher::vector_element_basic_type(n->in(2)) == T_INT));
+ match(Set dst (UMinReductionV isrc vsrc));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "reduce_uminI_neon $dst, $isrc, $vsrc\t# KILL $tmp, cr" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
+ uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
+ __ neon_reduce_minmax_integral(this->ideal_Opcode(), $dst$$Register, bt,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ length_in_bytes, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_uminI_sve(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
+ vRegD tmp, rFlagsReg cr) %{
+ predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) &&
+ (Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
+ Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
+ Matcher::vector_element_basic_type(n->in(2)) == T_INT));
+ match(Set dst (UMinReductionV isrc vsrc));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "reduce_uminI_sve $dst, $isrc, $vsrc\t# KILL $tmp, cr" %}
+ ins_encode %{
+ assert(UseSVE > 0, "must be sve");
+ BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
+ uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
+ assert(length_in_bytes == MaxVectorSize, "invalid vector length");
+ __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ ptrue, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// reduction uminL
+
+instruct reduce_uminL_neon(iRegLNoSp dst, iRegL isrc, vReg vsrc, rFlagsReg cr) %{
+ predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
+ match(Set dst (UMinReductionV isrc vsrc));
+ effect(TEMP_DEF dst, KILL cr);
+ format %{ "reduce_uminL_neon $dst, $isrc, $vsrc\t# 2L. KILL cr" %}
+ ins_encode %{
+ __ neon_reduce_minmax_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ /* vector_length_in_bytes */ 16, fnoreg);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_uminL_sve(iRegLNoSp dst, iRegL isrc, vReg vsrc,
+ vRegD tmp, rFlagsReg cr) %{
+ predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
+ match(Set dst (UMinReductionV isrc vsrc));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "reduce_uminL_sve $dst, $isrc, $vsrc\t# KILL $tmp, cr" %}
+ ins_encode %{
+ uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
+ assert(length_in_bytes == MaxVectorSize, "invalid vector length");
+ __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ ptrue, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// reduction umin - predicated
+
+instruct reduce_uminI_masked(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc, pRegGov pg,
+ vRegD tmp, rFlagsReg cr) %{
+ predicate(UseSVE > 0 &&
+ (Matcher::vector_element_basic_type(n->in(1)->in(2)) == T_BYTE ||
+ Matcher::vector_element_basic_type(n->in(1)->in(2)) == T_SHORT ||
+ Matcher::vector_element_basic_type(n->in(1)->in(2)) == T_INT));
+ match(Set dst (UMinReductionV (Binary isrc vsrc) pg));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "reduce_uminI_masked $dst, $isrc, $pg, $vsrc\t# KILL $tmp, cr" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
+ __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ $pg$$PRegister, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_uminL_masked(iRegLNoSp dst, iRegL isrc, vReg vsrc, pRegGov pg,
+ vRegD tmp, rFlagsReg cr) %{
+ predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n->in(1)->in(2)) == T_LONG);
+ match(Set dst (UMinReductionV (Binary isrc vsrc) pg));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "reduce_uminL_masked $dst, $isrc, $pg, $vsrc\t# KILL $tmp, cr" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
+ __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ $pg$$PRegister, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// reduction umaxI
+
+instruct reduce_umaxI_neon(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
+ vReg tmp, rFlagsReg cr) %{
+ predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) &&
+ (Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
+ Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
+ Matcher::vector_element_basic_type(n->in(2)) == T_INT));
+ match(Set dst (UMaxReductionV isrc vsrc));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "reduce_umaxI_neon $dst, $isrc, $vsrc\t# KILL $tmp, cr" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
+ uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
+ __ neon_reduce_minmax_integral(this->ideal_Opcode(), $dst$$Register, bt,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ length_in_bytes, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_umaxI_sve(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
+ vRegD tmp, rFlagsReg cr) %{
+ predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) &&
+ (Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
+ Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
+ Matcher::vector_element_basic_type(n->in(2)) == T_INT));
+ match(Set dst (UMaxReductionV isrc vsrc));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "reduce_umaxI_sve $dst, $isrc, $vsrc\t# KILL $tmp, cr" %}
+ ins_encode %{
+ assert(UseSVE > 0, "must be sve");
+ BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
+ uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
+ assert(length_in_bytes == MaxVectorSize, "invalid vector length");
+ __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ ptrue, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// reduction umaxL
+
+instruct reduce_umaxL_neon(iRegLNoSp dst, iRegL isrc, vReg vsrc, rFlagsReg cr) %{
+ predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
+ match(Set dst (UMaxReductionV isrc vsrc));
+ effect(TEMP_DEF dst, KILL cr);
+ format %{ "reduce_umaxL_neon $dst, $isrc, $vsrc\t# 2L. KILL cr" %}
+ ins_encode %{
+ __ neon_reduce_minmax_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ /* vector_length_in_bytes */ 16, fnoreg);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_umaxL_sve(iRegLNoSp dst, iRegL isrc, vReg vsrc,
+ vRegD tmp, rFlagsReg cr) %{
+ predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
+ match(Set dst (UMaxReductionV isrc vsrc));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "reduce_umaxL_sve $dst, $isrc, $vsrc\t# KILL $tmp, cr" %}
+ ins_encode %{
+ uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
+ assert(length_in_bytes == MaxVectorSize, "invalid vector length");
+ __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, T_LONG,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ ptrue, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// reduction umax - predicated
+
+instruct reduce_umaxI_masked(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc, pRegGov pg,
+ vRegD tmp, rFlagsReg cr) %{
+ predicate(UseSVE > 0 &&
+ (Matcher::vector_element_basic_type(n->in(1)->in(2)) == T_BYTE ||
+ Matcher::vector_element_basic_type(n->in(1)->in(2)) == T_SHORT ||
+ Matcher::vector_element_basic_type(n->in(1)->in(2)) == T_INT));
+ match(Set dst (UMaxReductionV (Binary isrc vsrc) pg));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "reduce_umaxI_masked $dst, $isrc, $pg, $vsrc\t# KILL $tmp, cr" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
+ __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ $pg$$PRegister, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_umaxL_masked(iRegLNoSp dst, iRegL isrc, vReg vsrc, pRegGov pg,
+ vRegD tmp, rFlagsReg cr) %{
+ predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n->in(1)->in(2)) == T_LONG);
+ match(Set dst (UMaxReductionV (Binary isrc vsrc) pg));
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "reduce_umaxL_masked $dst, $isrc, $pg, $vsrc\t# KILL $tmp, cr" %}
+ ins_encode %{
+ BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
+ __ sve_reduce_integral(this->ideal_Opcode(), $dst$$Register, bt,
+ $isrc$$Register, $vsrc$$FloatRegister,
+ $pg$$PRegister, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
// ------------------------------ Vector reinterpret ---------------------------
instruct reinterpret_same_size(vReg dst_src) %{
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
index 66dc22c3758..58ed234194a 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
-// Copyright (c) 2020, 2025, Arm Limited. All rights reserved.
+// Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, 2026, Arm Limited. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -191,6 +191,8 @@ source %{
case Op_XorReductionV:
case Op_MinReductionV:
case Op_MaxReductionV:
+ case Op_UMinReductionV:
+ case Op_UMaxReductionV:
// Reductions with less than 8 bytes vector length are
// not supported.
if (length_in_bytes < 8) {
@@ -235,10 +237,39 @@ source %{
case Op_MinVHF:
case Op_MaxVHF:
case Op_SqrtVHF:
+ if (UseSVE == 0 && !is_feat_fp16_supported()) {
+ return false;
+ }
+ break;
+ // At the time of writing this, the Vector API has no half-float (FP16) species.
+ // Consequently, AddReductionVHF and MulReductionVHF are only produced by the
+ // auto-vectorizer, which requires strictly ordered semantics for FP reductions.
+ //
+ // There is no direct Neon instruction that performs strictly ordered floating
+ // point add reduction. Hence, on Neon only machines, the add reduction operation
+ // is implemented as a scalarized sequence using half-precision scalar instruction
+ // FADD which requires FEAT_FP16 and ASIMDHP to be available on the target.
+ // On SVE machines (UseSVE > 0) however, there is a direct instruction (FADDA) which
+ // implements strictly ordered floating point add reduction which does not require
+ // the FEAT_FP16 and ASIMDHP checks as SVE supports half-precision floats by default.
+ case Op_AddReductionVHF:
// FEAT_FP16 is enabled if both "fphp" and "asimdhp" features are supported.
// Only the Neon instructions need this check. SVE supports half-precision floats
// by default.
- if (UseSVE == 0 && !is_feat_fp16_supported()) {
+ if (length_in_bytes < 8 || (UseSVE == 0 && !is_feat_fp16_supported())) {
+ return false;
+ }
+ break;
+ case Op_MulReductionVHF:
+ // There are no direct Neon/SVE instructions that perform strictly ordered
+ // floating point multiply reduction.
+ // For vector length ≤ 16 bytes, the reduction is implemented as a scalarized
+ // sequence using half-precision scalar instruction FMUL. This path requires
+ // FEAT_FP16 and ASIMDHP to be available on the target.
+ // For vector length > 16 bytes, this operation is disabled because there is no
+ // direct SVE instruction that performs a strictly ordered FP16 multiply
+ // reduction.
+ if (length_in_bytes < 8 || length_in_bytes > 16 || !is_feat_fp16_supported()) {
return false;
}
break;
@@ -288,6 +319,7 @@ source %{
case Op_VectorRearrange:
case Op_MulReductionVD:
case Op_MulReductionVF:
+ case Op_MulReductionVHF:
case Op_MulReductionVI:
case Op_MulReductionVL:
case Op_CompressBitsV:
@@ -352,6 +384,7 @@ source %{
case Op_VectorMaskCmp:
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
+ case Op_AddReductionVHF:
case Op_AddReductionVF:
case Op_AddReductionVD:
case Op_AndReductionV:
@@ -373,6 +406,8 @@ source %{
return !VM_Version::use_neon_for_vector(length_in_bytes);
case Op_MinReductionV:
case Op_MaxReductionV:
+ case Op_UMinReductionV:
+ case Op_UMaxReductionV:
// For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON
// instructions rather than SVE predicated instructions for
// better performance.
@@ -2059,6 +2094,25 @@ instruct reduce_non_strict_order_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vR
ins_pipe(pipe_slow);
%}
dnl
+
+// Add Reduction for Half floats (FP16).
+// Neon does not provide direct instructions for strictly ordered floating-point add reductions.
+// On Neon-only targets (UseSVE = 0), this operation is implemented as a sequence of scalar additions:
+// values equal to the vector width are loaded into a vector register, each lane is extracted,
+// and its value is accumulated into the running sum, producing a final scalar result.
+instruct reduce_addHF_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
+ predicate(UseSVE == 0);
+ match(Set dst (AddReductionVHF fsrc vsrc));
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "reduce_addHF $dst, $fsrc, $vsrc\t# 4HF/8HF. KILL $tmp" %}
+ ins_encode %{
+ uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
+ __ neon_reduce_add_fp16($dst$$FloatRegister, $fsrc$$FloatRegister,
+ $vsrc$$FloatRegister, length_in_bytes, $tmp$$FloatRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+dnl
dnl REDUCE_ADD_FP_SVE($1, $2 )
dnl REDUCE_ADD_FP_SVE(type, size)
define(`REDUCE_ADD_FP_SVE', `
@@ -2070,21 +2124,26 @@ define(`REDUCE_ADD_FP_SVE', `
// strictly ordered.
// 2. Strictly-ordered AddReductionV$1. For example - AddReductionV$1 generated by
// auto-vectorization on SVE machine.
-instruct reduce_add$1_sve(vReg$1 dst_src1, vReg src2) %{
- predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) ||
- n->as_Reduction()->requires_strict_order());
+instruct reduce_add$1_sve(vReg`'ifelse($1, HF, F, $1) dst_src1, vReg src2) %{
+ ifelse($1, HF,
+ `predicate(UseSVE > 0);',
+ `predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) ||
+ n->as_Reduction()->requires_strict_order());')
match(Set dst_src1 (AddReductionV$1 dst_src1 src2));
format %{ "reduce_add$1_sve $dst_src1, $dst_src1, $src2" %}
ins_encode %{
- assert(UseSVE > 0, "must be sve");
- uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src2);
+ ifelse($1, HF, `',
+ `assert(UseSVE > 0, "must be sve");
+ ')dnl
+uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src2);
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ sve_fadda($dst_src1$$FloatRegister, __ $2, ptrue, $src2$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
-REDUCE_ADD_FP_SVE(F, S)
+REDUCE_ADD_FP_SVE(HF, H)
+REDUCE_ADD_FP_SVE(F, S)
// reduction addD
@@ -2125,21 +2184,30 @@ dnl
dnl REDUCE_ADD_FP_PREDICATE($1, $2 )
dnl REDUCE_ADD_FP_PREDICATE(insn_name, op_name)
define(`REDUCE_ADD_FP_PREDICATE', `
-instruct reduce_add$1_masked(vReg$1 dst_src1, vReg src2, pRegGov pg) %{
+instruct reduce_add$1_masked(vReg$2 dst_src1, vReg src2, pRegGov pg) %{
predicate(UseSVE > 0);
- match(Set dst_src1 (AddReductionV$1 (Binary dst_src1 src2) pg));
+ ifelse($2, F,
+ `match(Set dst_src1 (AddReductionVHF (Binary dst_src1 src2) pg));
+ match(Set dst_src1 (AddReductionV$2 (Binary dst_src1 src2) pg));',
+ `match(Set dst_src1 (AddReductionV$2 (Binary dst_src1 src2) pg));')
format %{ "reduce_add$1_masked $dst_src1, $pg, $dst_src1, $src2" %}
ins_encode %{
- __ sve_fadda($dst_src1$$FloatRegister, __ $2,
- $pg$$PRegister, $src2$$FloatRegister);
+ ifelse($2, F,
+ `BasicType bt = Matcher::vector_element_basic_type(this, $src2);
+ ',)dnl
+ifelse($2, F,
+ `__ sve_fadda($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+ $pg$$PRegister, $src2$$FloatRegister);',
+ `__ sve_fadda($dst_src1$$FloatRegister, __ $2,
+ $pg$$PRegister, $src2$$FloatRegister);')
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
REDUCE_ADD_INT_PREDICATE(I, iRegIorL2I)
REDUCE_ADD_INT_PREDICATE(L, iRegL)
-REDUCE_ADD_FP_PREDICATE(F, S)
-REDUCE_ADD_FP_PREDICATE(D, D)
+REDUCE_ADD_FP_PREDICATE(FHF, F)
+REDUCE_ADD_FP_PREDICATE(D, D)
// ------------------------------ Vector reduction mul -------------------------
@@ -2172,30 +2240,37 @@ instruct reduce_mulL(iRegLNoSp dst, iRegL isrc, vReg vsrc) %{
ins_pipe(pipe_slow);
%}
-instruct reduce_mulF(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
- predicate(Matcher::vector_length_in_bytes(n->in(2)) <= 16);
- match(Set dst (MulReductionVF fsrc vsrc));
+dnl REDUCE_MUL_FP($1, $2 )
+dnl REDUCE_MUL_FP(insn_name, op_name)
+define(`REDUCE_MUL_FP', `
+instruct reduce_mul$1(vReg$2 dst, vReg$2 ifelse($2, F, fsrc, dsrc), vReg vsrc, vReg tmp) %{
+ predicate(Matcher::vector_length_in_bytes(n->in(2)) ifelse($2, F, <=, ==) 16);
+ ifelse($2, F,
+ `match(Set dst (MulReductionVHF fsrc vsrc));
+ match(Set dst (MulReductionV$2 fsrc vsrc));',
+ `match(Set dst (MulReductionV$2 dsrc vsrc));')
effect(TEMP_DEF dst, TEMP tmp);
- format %{ "reduce_mulF $dst, $fsrc, $vsrc\t# 2F/4F. KILL $tmp" %}
+ ifelse($2, F,
+ `format %{ "reduce_mul$1 $dst, $fsrc, $vsrc\t# 2F/4F/4HF/8HF. KILL $tmp" %}',
+ `format %{ "reduce_mul$1 $dst, $dsrc, $vsrc\t# 2D. KILL $tmp" %}')
ins_encode %{
- uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
- __ neon_reduce_mul_fp($dst$$FloatRegister, T_FLOAT, $fsrc$$FloatRegister,
- $vsrc$$FloatRegister, length_in_bytes, $tmp$$FloatRegister);
+ ifelse($2, F,
+ `uint length_in_bytes = Matcher::vector_length_in_bytes(this, $vsrc);
+ ',)dnl
+ifelse($2, F,
+ `BasicType bt = Matcher::vector_element_basic_type(this, $vsrc);
+ ',)dnl
+ifelse($2, F,
+ `__ neon_reduce_mul_fp($dst$$FloatRegister, bt, $fsrc$$FloatRegister,
+ $vsrc$$FloatRegister, length_in_bytes, $tmp$$FloatRegister);',
+ `__ neon_reduce_mul_fp($dst$$FloatRegister, T_DOUBLE, $dsrc$$FloatRegister,
+ $vsrc$$FloatRegister, 16, $tmp$$FloatRegister);')
%}
ins_pipe(pipe_slow);
-%}
-
-instruct reduce_mulD(vRegD dst, vRegD dsrc, vReg vsrc, vReg tmp) %{
- predicate(Matcher::vector_length_in_bytes(n->in(2)) == 16);
- match(Set dst (MulReductionVD dsrc vsrc));
- effect(TEMP_DEF dst, TEMP tmp);
- format %{ "reduce_mulD $dst, $dsrc, $vsrc\t# 2D. KILL $tmp" %}
- ins_encode %{
- __ neon_reduce_mul_fp($dst$$FloatRegister, T_DOUBLE, $dsrc$$FloatRegister,
- $vsrc$$FloatRegister, 16, $tmp$$FloatRegister);
- %}
- ins_pipe(pipe_slow);
-%}
+%}')dnl
+dnl
+REDUCE_MUL_FP(FHF, F)
+REDUCE_MUL_FP(D, D)
dnl
dnl REDUCE_BITWISE_OP_NEON($1, $2 $3 $4 )
@@ -2505,6 +2580,32 @@ REDUCE_MAXMIN_INT_PREDICATE(min, L, iRegL, MinReductionV)
REDUCE_MAXMIN_FP_PREDICATE(min, F, fsrc, MinReductionV, sve_fminv, fmins)
REDUCE_MAXMIN_FP_PREDICATE(min, D, dsrc, MinReductionV, sve_fminv, fmind)
+// -------------------- Vector reduction unsigned min/max ----------------------
+
+// reduction uminI
+REDUCE_MAXMIN_I_NEON(umin, UMinReductionV)
+REDUCE_MAXMIN_I_SVE(umin, UMinReductionV)
+
+// reduction uminL
+REDUCE_MAXMIN_L_NEON(umin, UMinReductionV)
+REDUCE_MAXMIN_L_SVE(umin, UMinReductionV)
+
+// reduction umin - predicated
+REDUCE_MAXMIN_INT_PREDICATE(umin, I, iRegIorL2I, UMinReductionV)
+REDUCE_MAXMIN_INT_PREDICATE(umin, L, iRegL, UMinReductionV)
+
+// reduction umaxI
+REDUCE_MAXMIN_I_NEON(umax, UMaxReductionV)
+REDUCE_MAXMIN_I_SVE(umax, UMaxReductionV)
+
+// reduction umaxL
+REDUCE_MAXMIN_L_NEON(umax, UMaxReductionV)
+REDUCE_MAXMIN_L_SVE(umax, UMaxReductionV)
+
+// reduction umax - predicated
+REDUCE_MAXMIN_INT_PREDICATE(umax, I, iRegIorL2I, UMaxReductionV)
+REDUCE_MAXMIN_INT_PREDICATE(umax, L, iRegL, UMaxReductionV)
+
// ------------------------------ Vector reinterpret ---------------------------
instruct reinterpret_same_size(vReg dst_src) %{
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
index 18807c667e3..4c1c8d9bbc8 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -1000,30 +1000,6 @@ public:
f(0b0101010, 31, 25), f(0, 24), sf(offset, 23, 5), f(0, 4), f(cond, 3, 0);
}
-#define INSN(NAME, cond) \
- void NAME(address dest) { \
- br(cond, dest); \
- }
-
- INSN(beq, EQ);
- INSN(bne, NE);
- INSN(bhs, HS);
- INSN(bcs, CS);
- INSN(blo, LO);
- INSN(bcc, CC);
- INSN(bmi, MI);
- INSN(bpl, PL);
- INSN(bvs, VS);
- INSN(bvc, VC);
- INSN(bhi, HI);
- INSN(bls, LS);
- INSN(bge, GE);
- INSN(blt, LT);
- INSN(bgt, GT);
- INSN(ble, LE);
- INSN(bal, AL);
- INSN(bnv, NV);
-
void br(Condition cc, Label &L);
#undef INSN
@@ -1095,6 +1071,10 @@ public:
#undef INSN
+ void wfet(Register rt) {
+ system(0b00, 0b011, 0b0001, 0b0000, 0b000, rt);
+ }
+
// we only provide mrs and msr for the special purpose system
// registers where op1 (instr[20:19]) == 11
// n.b msr has L (instr[21]) == 0 mrs has L == 1
@@ -2658,6 +2638,8 @@ template
INSN(uminv, 1, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(smaxp, 0, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(sminp, 0, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(umaxp, 1, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(uminp, 1, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(sqdmulh,0, 0b101101, false); // accepted arrangements: T4H, T8H, T2S, T4S
INSN(shsubv, 0, 0b001001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
@@ -3490,7 +3472,9 @@ public:
INSN(sve_sub, 0b00000100, 0b000001000); // vector sub
INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
INSN(sve_umax, 0b00000100, 0b001001000); // unsigned maximum vectors
+ INSN(sve_umaxv, 0b00000100, 0b001001001); // unsigned maximum reduction to scalar
INSN(sve_umin, 0b00000100, 0b001011000); // unsigned minimum vectors
+ INSN(sve_uminv, 0b00000100, 0b001011001); // unsigned minimum reduction to scalar
#undef INSN
// SVE floating-point arithmetic - predicate
@@ -3810,8 +3794,8 @@ public:
}
private:
- void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8,
- bool isMerge, bool isFloat) {
+ void _sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8,
+ bool isMerge, bool isFloat) {
starti;
assert(T != Q, "invalid size");
int sh = 0;
@@ -3835,11 +3819,11 @@ private:
public:
// SVE copy signed integer immediate to vector elements (predicated)
void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8, bool isMerge) {
- sve_cpy(Zd, T, Pg, imm8, isMerge, /*isFloat*/false);
+ _sve_cpy(Zd, T, Pg, imm8, isMerge, /*isFloat*/false);
}
// SVE copy floating-point immediate to vector elements (predicated)
void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, double d) {
- sve_cpy(Zd, T, Pg, checked_cast(pack(d)), /*isMerge*/true, /*isFloat*/true);
+ _sve_cpy(Zd, T, Pg, checked_cast(pack(d)), /*isMerge*/true, /*isFloat*/true);
}
// SVE conditionally select elements from two vectors
@@ -4325,6 +4309,7 @@ public:
#undef INSN
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
+ MACOS_AARCH64_ONLY(os::thread_wx_enable_write());
}
// Stack overflow checking
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index 37a6a130e0d..4de6237304d 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -33,6 +33,7 @@
#include "c1/c1_ValueStack.hpp"
#include "ci/ciArrayKlass.hpp"
#include "ci/ciInstance.hpp"
+#include "code/aotCodeCache.hpp"
#include "code/compiledIC.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/gc_globals.hpp"
@@ -41,6 +42,7 @@
#include "runtime/frame.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/threadIdentifier.hpp"
#include "utilities/powerOfTwo.hpp"
#include "vmreg_aarch64.inline.hpp"
@@ -58,22 +60,6 @@ const Register SHIFT_count = r0; // where count for shift operations must be
#define __ _masm->
-static void select_different_registers(Register preserve,
- Register extra,
- Register &tmp1,
- Register &tmp2) {
- if (tmp1 == preserve) {
- assert_different_registers(tmp1, tmp2, extra);
- tmp1 = extra;
- } else if (tmp2 == preserve) {
- assert_different_registers(tmp1, tmp2, extra);
- tmp2 = extra;
- }
- assert_different_registers(preserve, tmp1, tmp2);
-}
-
-
-
static void select_different_registers(Register preserve,
Register extra,
Register &tmp1,
@@ -532,6 +518,19 @@ void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_cod
case T_LONG: {
assert(patch_code == lir_patch_none, "no patching handled here");
+#if INCLUDE_CDS
+ if (AOTCodeCache::is_on_for_dump()) {
+ address b = c->as_pointer();
+ if (b == (address)ThreadIdentifier::unsafe_offset()) {
+ __ lea(dest->as_register_lo(), ExternalAddress(b));
+ break;
+ }
+ if (AOTRuntimeConstants::contains(b)) {
+ __ load_aotrc_address(dest->as_register_lo(), b);
+ break;
+ }
+ }
+#endif
__ mov(dest->as_register_lo(), (intptr_t)c->as_jlong());
break;
}
@@ -1218,43 +1217,11 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
__ bind(*op->stub()->continuation());
}
-void LIR_Assembler::type_profile_helper(Register mdo,
- ciMethodData *md, ciProfileData *data,
- Register recv, Label* update_done) {
+void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md,
+ ciProfileData *data, Register recv) {
- // Given a profile data offset, generate an Address which points to
- // the corresponding slot in mdo->data().
- // Clobbers rscratch2.
- auto slot_at = [=](ByteSize offset) -> Address {
- return __ form_address(rscratch2, mdo,
- md->byte_offset_of_slot(data, offset),
- LogBytesPerWord);
- };
-
- for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
- Label next_test;
- // See if the receiver is receiver[n].
- __ ldr(rscratch1, slot_at(ReceiverTypeData::receiver_offset(i)));
- __ cmp(recv, rscratch1);
- __ br(Assembler::NE, next_test);
- __ addptr(slot_at(ReceiverTypeData::receiver_count_offset(i)),
- DataLayout::counter_increment);
- __ b(*update_done);
- __ bind(next_test);
- }
-
- // Didn't find receiver; find next empty slot and fill it in
- for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
- Label next_test;
- Address recv_addr(slot_at(ReceiverTypeData::receiver_offset(i)));
- __ ldr(rscratch1, recv_addr);
- __ cbnz(rscratch1, next_test);
- __ str(recv, recv_addr);
- __ mov(rscratch1, DataLayout::counter_increment);
- __ str(rscratch1, slot_at(ReceiverTypeData::receiver_count_offset(i)));
- __ b(*update_done);
- __ bind(next_test);
- }
+ int mdp_offset = md->byte_offset_of_slot(data, in_ByteSize(0));
+ __ profile_receiver_type(recv, mdo, mdp_offset);
}
void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
@@ -1291,12 +1258,9 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
} else if (obj == klass_RInfo) {
klass_RInfo = dst;
}
- if (k->is_loaded() && !UseCompressedClassPointers) {
- select_different_registers(obj, dst, k_RInfo, klass_RInfo);
- } else {
- Rtmp1 = op->tmp3()->as_register();
- select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
- }
+
+ Rtmp1 = op->tmp3()->as_register();
+ select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
assert_different_registers(obj, k_RInfo, klass_RInfo);
@@ -1316,14 +1280,9 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
__ b(*obj_is_null);
__ bind(not_null);
- Label update_done;
Register recv = k_RInfo;
__ load_klass(recv, obj);
- type_profile_helper(mdo, md, data, recv, &update_done);
- Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
- __ addptr(counter_addr, DataLayout::counter_increment);
-
- __ bind(update_done);
+ type_profile_helper(mdo, md, data, recv);
} else {
__ cbz(obj, *obj_is_null);
}
@@ -1430,13 +1389,9 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ b(done);
__ bind(not_null);
- Label update_done;
Register recv = k_RInfo;
__ load_klass(recv, value);
- type_profile_helper(mdo, md, data, recv, &update_done);
- Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
- __ addptr(counter_addr, DataLayout::counter_increment);
- __ bind(update_done);
+ type_profile_helper(mdo, md, data, recv);
} else {
__ cbz(value, done);
}
@@ -2540,13 +2495,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
if (C1OptimizeVirtualCallProfiling && known_klass != nullptr) {
// We know the type that will be seen at this call site; we can
// statically update the MethodData* rather than needing to do
- // dynamic tests on the receiver type
-
- // NOTE: we should probably put a lock around this search to
- // avoid collisions by concurrent compilations
+ // dynamic tests on the receiver type.
ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
- uint i;
- for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ for (uint i = 0; i < VirtualCallData::row_limit(); i++) {
ciKlass* receiver = vc_data->receiver(i);
if (known_klass->equals(receiver)) {
Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
@@ -2554,36 +2505,13 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
return;
}
}
-
- // Receiver type not found in profile data; select an empty slot
-
- // Note that this is less efficient than it should be because it
- // always does a write to the receiver part of the
- // VirtualCallData rather than just the first time
- for (i = 0; i < VirtualCallData::row_limit(); i++) {
- ciKlass* receiver = vc_data->receiver(i);
- if (receiver == nullptr) {
- __ mov_metadata(rscratch1, known_klass->constant_encoding());
- Address recv_addr =
- __ form_address(rscratch2, mdo,
- md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)),
- LogBytesPerWord);
- __ str(rscratch1, recv_addr);
- Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
- __ addptr(data_addr, DataLayout::counter_increment);
- return;
- }
- }
+ // Receiver type is not found in profile data.
+ // Fall back to runtime helper to handle the rest at runtime.
+ __ mov_metadata(recv, known_klass->constant_encoding());
} else {
__ load_klass(recv, recv);
- Label update_done;
- type_profile_helper(mdo, md, data, recv, &update_done);
- // Receiver did not match any saved receiver and there is no empty row for it.
- // Increment total counter to indicate polymorphic case.
- __ addptr(counter_addr, DataLayout::counter_increment);
-
- __ bind(update_done);
}
+ type_profile_helper(mdo, md, data, recv);
} else {
// Static call
__ addptr(counter_addr, DataLayout::counter_increment);
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp
index 21916a5f7dd..5af06fc6a1c 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp
@@ -50,9 +50,8 @@ friend class ArrayCopyStub;
Address stack_slot_address(int index, uint shift, Register tmp, int adjust = 0);
// Record the type of the receiver in ReceiverTypeData
- void type_profile_helper(Register mdo,
- ciMethodData *md, ciProfileData *data,
- Register recv, Label* update_done);
+ void type_profile_helper(Register mdo, ciMethodData *md,
+ ciProfileData *data, Register recv);
void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
void casw(Register addr, Register newval, Register cmpval);
diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
index ad26d494b2d..f10c5197d91 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -1287,9 +1287,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
}
LIR_Opr reg = rlock_result(x);
LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
- if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
- tmp3 = new_register(objectType);
- }
+ tmp3 = new_register(objectType);
__ checkcast(reg, obj.result(), x->klass(),
new_register(objectType), new_register(objectType), tmp3,
x->direct_compare(), info_for_exception, patching_info, stub,
@@ -1308,9 +1306,7 @@ void LIRGenerator::do_InstanceOf(InstanceOf* x) {
}
obj.load_item();
LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
- if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
- tmp3 = new_register(objectType);
- }
+ tmp3 = new_register(objectType);
__ instanceof(reg, obj.result(), x->klass(),
new_register(objectType), new_register(objectType), tmp3,
x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
index e934632715c..89a9422ea48 100644
--- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -105,12 +105,8 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
} else {
mov(t1, checked_cast(markWord::prototype().value()));
str(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
- if (UseCompressedClassPointers) { // Take care not to kill klass
- encode_klass_not_null(t1, klass);
- strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
- } else {
- str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
- }
+ encode_klass_not_null(t1, klass); // Take care not to kill klass
+ strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
}
if (len->is_valid()) {
@@ -121,7 +117,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
// Clear gap/first 4 bytes following the length field.
strw(zr, Address(obj, base_offset));
}
- } else if (UseCompressedClassPointers && !UseCompactObjectHeaders) {
+ } else if (!UseCompactObjectHeaders) {
store_klass_gap(obj, zr);
}
}
diff --git a/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
index 938a64dd399..bb6b3ce907e 100644
--- a/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
@@ -42,7 +42,6 @@ define_pd_global(bool, TieredCompilation, false);
define_pd_global(intx, CompileThreshold, 1500 );
define_pd_global(intx, OnStackReplacePercentage, 933 );
-define_pd_global(intx, NewSizeThreadIncrease, 4*K );
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
define_pd_global(size_t, ReservedCodeCacheSize, 32*M );
define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M );
@@ -52,7 +51,6 @@ define_pd_global(bool, ProfileInterpreter, false);
define_pd_global(size_t, CodeCacheExpansionSize, 32*K );
define_pd_global(size_t, CodeCacheMinBlockLength, 1);
define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
-define_pd_global(bool, NeverActAsServerClassMachine, true );
define_pd_global(bool, CICompileOSR, true );
#endif // !COMPILER2
define_pd_global(bool, UseTypeProfile, false);
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index 75897a16fe4..3c179f21c14 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2026 Arm Limited and/or its affiliates.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,7 +31,9 @@
#include "opto/matcher.hpp"
#include "opto/output.hpp"
#include "opto/subnode.hpp"
+#include "runtime/objectMonitorTable.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/powerOfTwo.hpp"
@@ -221,37 +224,52 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register t1,
if (!UseObjectMonitorTable) {
assert(t1_monitor == t1_mark, "should be the same here");
} else {
+ const Register t1_hash = t1;
Label monitor_found;
- // Load cache address
- lea(t3_t, Address(rthread, JavaThread::om_cache_oops_offset()));
+ // Save the mark, we might need it to extract the hash.
+ mov(t3, t1_mark);
- const int num_unrolled = 2;
+ // Look for the monitor in the om_cache.
+
+ ByteSize cache_offset = JavaThread::om_cache_oops_offset();
+ ByteSize monitor_offset = OMCache::oop_to_monitor_difference();
+ const int num_unrolled = OMCache::CAPACITY;
for (int i = 0; i < num_unrolled; i++) {
- ldr(t1, Address(t3_t));
- cmp(obj, t1);
+ ldr(t1_monitor, Address(rthread, cache_offset + monitor_offset));
+ ldr(t2, Address(rthread, cache_offset));
+ cmp(obj, t2);
br(Assembler::EQ, monitor_found);
- increment(t3_t, in_bytes(OMCache::oop_to_oop_difference()));
+ cache_offset = cache_offset + OMCache::oop_to_oop_difference();
}
- Label loop;
+ // Look for the monitor in the table.
- // Search for obj in cache.
- bind(loop);
+ // Get the hash code.
+ ubfx(t1_hash, t3, markWord::hash_shift, markWord::hash_bits);
- // Check for match.
- ldr(t1, Address(t3_t));
- cmp(obj, t1);
- br(Assembler::EQ, monitor_found);
+ // Get the table and calculate the bucket's address
+ lea(t3, ExternalAddress(ObjectMonitorTable::current_table_address()));
+ ldr(t3, Address(t3));
+ ldr(t2, Address(t3, ObjectMonitorTable::table_capacity_mask_offset()));
+ ands(t1_hash, t1_hash, t2);
+ ldr(t3, Address(t3, ObjectMonitorTable::table_buckets_offset()));
- // Search until null encountered, guaranteed _null_sentinel at end.
- increment(t3_t, in_bytes(OMCache::oop_to_oop_difference()));
- cbnz(t1, loop);
- // Cache Miss, NE set from cmp above, cbnz does not set flags
- b(slow_path);
+ // Read the monitor from the bucket.
+ ldr(t1_monitor, Address(t3, t1_hash, Address::lsl(LogBytesPerWord)));
+
+ // Check if the monitor in the bucket is special (empty, tombstone or removed).
+ cmp(t1_monitor, (unsigned char)ObjectMonitorTable::SpecialPointerValues::below_is_special);
+ br(Assembler::LO, slow_path);
+
+ // Check if object matches.
+ ldr(t3, Address(t1_monitor, ObjectMonitor::object_offset()));
+ BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs_asm->try_resolve_weak_handle_in_c2(this, t3, t2, slow_path);
+ cmp(t3, obj);
+ br(Assembler::NE, slow_path);
bind(monitor_found);
- ldr(t1_monitor, Address(t3_t, OMCache::oop_to_monitor_difference()));
}
const Register t2_owner_addr = t2;
@@ -1866,6 +1884,27 @@ void C2_MacroAssembler::neon_reduce_mul_fp(FloatRegister dst, BasicType bt,
BLOCK_COMMENT("neon_reduce_mul_fp {");
switch(bt) {
+ // The T_SHORT type below is for Float16 type which also uses floating-point
+ // instructions.
+ case T_SHORT:
+ fmulh(dst, fsrc, vsrc);
+ ext(vtmp, T8B, vsrc, vsrc, 2);
+ fmulh(dst, dst, vtmp);
+ ext(vtmp, T8B, vsrc, vsrc, 4);
+ fmulh(dst, dst, vtmp);
+ ext(vtmp, T8B, vsrc, vsrc, 6);
+ fmulh(dst, dst, vtmp);
+ if (isQ) {
+ ext(vtmp, T16B, vsrc, vsrc, 8);
+ fmulh(dst, dst, vtmp);
+ ext(vtmp, T16B, vsrc, vsrc, 10);
+ fmulh(dst, dst, vtmp);
+ ext(vtmp, T16B, vsrc, vsrc, 12);
+ fmulh(dst, dst, vtmp);
+ ext(vtmp, T16B, vsrc, vsrc, 14);
+ fmulh(dst, dst, vtmp);
+ }
+ break;
case T_FLOAT:
fmuls(dst, fsrc, vsrc);
ins(vtmp, S, vsrc, 0, 1);
@@ -1890,6 +1929,33 @@ void C2_MacroAssembler::neon_reduce_mul_fp(FloatRegister dst, BasicType bt,
BLOCK_COMMENT("} neon_reduce_mul_fp");
}
+// Vector reduction add for half float type with ASIMD instructions.
+void C2_MacroAssembler::neon_reduce_add_fp16(FloatRegister dst, FloatRegister fsrc, FloatRegister vsrc,
+ unsigned vector_length_in_bytes, FloatRegister vtmp) {
+ assert(vector_length_in_bytes == 8 || vector_length_in_bytes == 16, "unsupported");
+ bool isQ = vector_length_in_bytes == 16;
+
+ BLOCK_COMMENT("neon_reduce_add_fp16 {");
+ faddh(dst, fsrc, vsrc);
+ ext(vtmp, T8B, vsrc, vsrc, 2);
+ faddh(dst, dst, vtmp);
+ ext(vtmp, T8B, vsrc, vsrc, 4);
+ faddh(dst, dst, vtmp);
+ ext(vtmp, T8B, vsrc, vsrc, 6);
+ faddh(dst, dst, vtmp);
+ if (isQ) {
+ ext(vtmp, T16B, vsrc, vsrc, 8);
+ faddh(dst, dst, vtmp);
+ ext(vtmp, T16B, vsrc, vsrc, 10);
+ faddh(dst, dst, vtmp);
+ ext(vtmp, T16B, vsrc, vsrc, 12);
+ faddh(dst, dst, vtmp);
+ ext(vtmp, T16B, vsrc, vsrc, 14);
+ faddh(dst, dst, vtmp);
+ }
+ BLOCK_COMMENT("} neon_reduce_add_fp16");
+}
+
// Helper to select logical instruction
void C2_MacroAssembler::neon_reduce_logical_helper(int opc, bool is64, Register Rd,
Register Rn, Register Rm,
@@ -1960,50 +2026,76 @@ void C2_MacroAssembler::neon_reduce_logical(int opc, Register dst, BasicType bt,
BLOCK_COMMENT("} neon_reduce_logical");
}
-// Vector reduction min/max for integral type with ASIMD instructions.
+// Helper function to decode min/max reduction operation properties
+void C2_MacroAssembler::decode_minmax_reduction_opc(int opc, bool* is_min,
+ bool* is_unsigned,
+ Condition* cond) {
+ switch(opc) {
+ case Op_MinReductionV:
+ *is_min = true; *is_unsigned = false; *cond = LT; break;
+ case Op_MaxReductionV:
+ *is_min = false; *is_unsigned = false; *cond = GT; break;
+ case Op_UMinReductionV:
+ *is_min = true; *is_unsigned = true; *cond = LO; break;
+ case Op_UMaxReductionV:
+ *is_min = false; *is_unsigned = true; *cond = HI; break;
+ default:
+ ShouldNotReachHere();
+ }
+}
+
+// Vector reduction min/max/umin/umax for integral type with ASIMD instructions.
// Note: vtmp is not used and expected to be fnoreg for T_LONG case.
// Clobbers: rscratch1, rflags
void C2_MacroAssembler::neon_reduce_minmax_integral(int opc, Register dst, BasicType bt,
Register isrc, FloatRegister vsrc,
unsigned vector_length_in_bytes,
FloatRegister vtmp) {
- assert(opc == Op_MinReductionV || opc == Op_MaxReductionV, "unsupported");
+ assert(opc == Op_MinReductionV || opc == Op_MaxReductionV ||
+ opc == Op_UMinReductionV || opc == Op_UMaxReductionV, "unsupported");
assert(vector_length_in_bytes == 8 || vector_length_in_bytes == 16, "unsupported");
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported");
assert_different_registers(dst, isrc);
bool isQ = vector_length_in_bytes == 16;
- bool is_min = opc == Op_MinReductionV;
-
+ bool is_min;
+ bool is_unsigned;
+ Condition cond;
+ decode_minmax_reduction_opc(opc, &is_min, &is_unsigned, &cond);
BLOCK_COMMENT("neon_reduce_minmax_integral {");
if (bt == T_LONG) {
assert(vtmp == fnoreg, "should be");
assert(isQ, "should be");
umov(rscratch1, vsrc, D, 0);
cmp(isrc, rscratch1);
- csel(dst, isrc, rscratch1, is_min ? LT : GT);
+ csel(dst, isrc, rscratch1, cond);
umov(rscratch1, vsrc, D, 1);
cmp(dst, rscratch1);
- csel(dst, dst, rscratch1, is_min ? LT : GT);
+ csel(dst, dst, rscratch1, cond);
} else {
SIMD_Arrangement size = esize2arrangement((unsigned)type2aelembytes(bt), isQ);
if (size == T2S) {
- is_min ? sminp(vtmp, size, vsrc, vsrc) : smaxp(vtmp, size, vsrc, vsrc);
+ // For T2S (2x32-bit elements), use pairwise instructions because
+ // uminv/umaxv/sminv/smaxv don't support arrangement 2S.
+ neon_minmaxp(is_unsigned, is_min, vtmp, size, vsrc, vsrc);
} else {
- is_min ? sminv(vtmp, size, vsrc) : smaxv(vtmp, size, vsrc);
+ // For other sizes, use reduction to scalar instructions.
+ neon_minmaxv(is_unsigned, is_min, vtmp, size, vsrc);
}
if (bt == T_INT) {
umov(dst, vtmp, S, 0);
+ } else if (is_unsigned) {
+ umov(dst, vtmp, elemType_to_regVariant(bt), 0);
} else {
smov(dst, vtmp, elemType_to_regVariant(bt), 0);
}
cmpw(dst, isrc);
- cselw(dst, dst, isrc, is_min ? LT : GT);
+ cselw(dst, dst, isrc, cond);
}
BLOCK_COMMENT("} neon_reduce_minmax_integral");
}
// Vector reduction for integral type with SVE instruction.
-// Supported operations are Add, And, Or, Xor, Max, Min.
+// Supported operations are Add, And, Or, Xor, Max, Min, UMax, UMin.
// rflags would be clobbered if opc is Op_MaxReductionV or Op_MinReductionV.
void C2_MacroAssembler::sve_reduce_integral(int opc, Register dst, BasicType bt, Register src1,
FloatRegister src2, PRegister pg, FloatRegister tmp) {
@@ -2075,35 +2167,27 @@ void C2_MacroAssembler::sve_reduce_integral(int opc, Register dst, BasicType bt,
}
break;
}
- case Op_MaxReductionV: {
- sve_smaxv(tmp, size, pg, src2);
- if (bt == T_INT || bt == T_LONG) {
+ case Op_MaxReductionV:
+ case Op_MinReductionV:
+ case Op_UMaxReductionV:
+ case Op_UMinReductionV: {
+ bool is_min;
+ bool is_unsigned;
+ Condition cond;
+ decode_minmax_reduction_opc(opc, &is_min, &is_unsigned, &cond);
+ sve_minmaxv(is_unsigned, is_min, tmp, size, pg, src2);
+ // Move result from vector to general register
+ if (is_unsigned || bt == T_INT || bt == T_LONG) {
umov(dst, tmp, size, 0);
} else {
smov(dst, tmp, size, 0);
}
if (bt == T_LONG) {
cmp(dst, src1);
- csel(dst, dst, src1, Assembler::GT);
+ csel(dst, dst, src1, cond);
} else {
cmpw(dst, src1);
- cselw(dst, dst, src1, Assembler::GT);
- }
- break;
- }
- case Op_MinReductionV: {
- sve_sminv(tmp, size, pg, src2);
- if (bt == T_INT || bt == T_LONG) {
- umov(dst, tmp, size, 0);
- } else {
- smov(dst, tmp, size, 0);
- }
- if (bt == T_LONG) {
- cmp(dst, src1);
- csel(dst, dst, src1, Assembler::LT);
- } else {
- cmpw(dst, src1);
- cselw(dst, dst, src1, Assembler::LT);
+ cselw(dst, dst, src1, cond);
}
break;
}
@@ -2379,17 +2463,17 @@ void C2_MacroAssembler::neon_rearrange_hsd(FloatRegister dst, FloatRegister src,
break;
case T_LONG:
case T_DOUBLE:
- // Load the iota indices for Long type. The indices are ordered by
- // type B/S/I/L/F/D, and the offset between two types is 16; Hence
- // the offset for L is 48.
- lea(rscratch1,
- ExternalAddress(StubRoutines::aarch64::vector_iota_indices() + 48));
- ldrq(tmp, rscratch1);
- // Check whether the input "shuffle" is the same with iota indices.
- // Return "src" if true, otherwise swap the two elements of "src".
- cm(EQ, dst, size2, shuffle, tmp);
- ext(tmp, size1, src, src, 8);
- bsl(dst, size1, src, tmp);
+ {
+ int idx = vector_iota_entry_index(T_LONG);
+ lea(rscratch1,
+ ExternalAddress(StubRoutines::aarch64::vector_iota_indices(idx)));
+ ldrq(tmp, rscratch1);
+ // Check whether the input "shuffle" is the same with iota indices.
+ // Return "src" if true, otherwise swap the two elements of "src".
+ cm(EQ, dst, size2, shuffle, tmp);
+ ext(tmp, size1, src, src, 8);
+ bsl(dst, size1, src, tmp);
+ }
break;
default:
assert(false, "unsupported element type");
@@ -2840,3 +2924,45 @@ void C2_MacroAssembler::vector_expand_sve(FloatRegister dst, FloatRegister src,
// dst = 00 87 00 65 00 43 00 21
sve_tbl(dst, size, src, dst);
}
+
+// Optimized SVE cpy (imm, zeroing) instruction.
+//
+// `movi; cpy(imm, merging)` and `cpy(imm, zeroing)` have the same
+// functionality, but test results show that `movi; cpy(imm, merging)` has
+// higher throughput on some microarchitectures. This would depend on
+// microarchitecture and so may vary between implementations.
+void C2_MacroAssembler::sve_cpy(FloatRegister dst, SIMD_RegVariant T,
+ PRegister pg, int imm8, bool isMerge) {
+ if (VM_Version::prefer_sve_merging_mode_cpy() && !isMerge) {
+ // Generates a NEON instruction `movi V.2d, #0`.
+ // On AArch64, Z and V registers alias in the low 128 bits, so V is
+ // the low 128 bits of Z. A write to V also clears all bits of
+ // Z above 128, so this `movi` instruction effectively zeroes the
+ // entire Z register. According to the Arm Software Optimization
+ // Guide, `movi` is zero latency.
+ movi(dst, T2D, 0);
+ isMerge = true;
+ }
+ Assembler::sve_cpy(dst, T, pg, imm8, isMerge);
+}
+
+int C2_MacroAssembler::vector_iota_entry_index(BasicType bt) {
+ // The vector iota entries array is ordered by type B/S/I/L/F/D, and
+ // the offset between two types is 16.
+ switch(bt) {
+ case T_BYTE:
+ return 0;
+ case T_SHORT:
+ return 1;
+ case T_INT:
+ return 2;
+ case T_LONG:
+ return 3;
+ case T_FLOAT:
+ return 4;
+ case T_DOUBLE:
+ return 5;
+ default:
+ ShouldNotReachHere();
+ }
+}
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
index 412f0f37e9e..f96d3ffb863 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,37 @@
void neon_reduce_logical_helper(int opc, bool sf, Register Rd, Register Rn, Register Rm,
enum shift_kind kind = Assembler::LSL, unsigned shift = 0);
+ // Helper functions for min/max reduction operations
+
+ void decode_minmax_reduction_opc(int opc, bool* is_min, bool* is_unsigned, Condition* cond);
+
+ void neon_minmaxp(bool is_unsigned, bool is_min, FloatRegister dst,
+ SIMD_Arrangement size, FloatRegister src1, FloatRegister src2) {
+ auto m = is_unsigned ? (is_min ? &Assembler::uminp : &Assembler::umaxp)
+ : (is_min ? &Assembler::sminp : &Assembler::smaxp);
+ (this->*m)(dst, size, src1, src2);
+ }
+
+ // Typedefs used to disambiguate overloaded member functions.
+ typedef void (Assembler::*neon_reduction2)
+ (FloatRegister, SIMD_Arrangement, FloatRegister);
+
+ void neon_minmaxv(bool is_unsigned, bool is_min, FloatRegister dst,
+ SIMD_Arrangement size, FloatRegister src) {
+ auto m = is_unsigned ? (is_min ? (neon_reduction2)&Assembler::uminv
+ : (neon_reduction2)&Assembler::umaxv)
+ : (is_min ? &Assembler::sminv
+ : &Assembler::smaxv);
+ (this->*m)(dst, size, src);
+ }
+
+ void sve_minmaxv(bool is_unsigned, bool is_min, FloatRegister dst,
+ SIMD_RegVariant size, PRegister pg, FloatRegister src) {
+ auto m = is_unsigned ? (is_min ? &Assembler::sve_uminv : &Assembler::sve_umaxv)
+ : (is_min ? &Assembler::sve_sminv : &Assembler::sve_smaxv);
+ (this->*m)(dst, size, pg, src);
+ }
+
void select_from_two_vectors_neon(FloatRegister dst, FloatRegister src1,
FloatRegister src2, FloatRegister index,
FloatRegister tmp, unsigned vector_length_in_bytes);
@@ -44,6 +75,8 @@
unsigned vector_length_in_bytes);
public:
+ using Assembler::sve_cpy;
+
// jdk.internal.util.ArraysSupport.vectorizedHashCode
address arrays_hashcode(Register ary, Register cnt, Register result, FloatRegister vdata0,
FloatRegister vdata1, FloatRegister vdata2, FloatRegister vdata3,
@@ -144,6 +177,9 @@
FloatRegister fsrc, FloatRegister vsrc,
unsigned vector_length_in_bytes, FloatRegister vtmp);
+ void neon_reduce_add_fp16(FloatRegister dst, FloatRegister fsrc, FloatRegister vsrc,
+ unsigned vector_length_in_bytes, FloatRegister vtmp);
+
void neon_reduce_logical(int opc, Register dst, BasicType bt, Register isrc,
FloatRegister vsrc, unsigned vector_length_in_bytes);
@@ -213,4 +249,8 @@
void vector_expand_sve(FloatRegister dst, FloatRegister src, PRegister pg,
FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
int vector_length_in_bytes);
+
+ void sve_cpy(FloatRegister dst, SIMD_RegVariant T, PRegister pg, int imm8,
+ bool isMerge);
+ int vector_iota_entry_index(BasicType bt);
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
index a0dea3643a1..192461d1a61 100644
--- a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
@@ -47,7 +47,6 @@ define_pd_global(intx, ConditionalMoveLimit, 3);
define_pd_global(intx, FreqInlineSize, 325);
define_pd_global(intx, MinJumpTableSize, 10);
define_pd_global(intx, InteriorEntryAlignment, 16);
-define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
// InitialCodeCacheSize derived from specjbb2000 run.
@@ -75,9 +74,6 @@ define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M );
define_pd_global(size_t, CodeCacheMinBlockLength, 6);
define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
-// Ergonomics related flags
-define_pd_global(bool, NeverActAsServerClassMachine, false);
-
define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed.
#endif // CPU_AARCH64_C2_GLOBALS_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
index 6fe3315014b..640cd495383 100644
--- a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
@@ -89,16 +89,21 @@ void CompiledDirectCall::set_to_interpreted(const methodHandle& callee, address
NativeMovConstReg* method_holder
= nativeMovConstReg_at(stub + NativeInstruction::instruction_size);
+ // In AOT "production" run we have mixture of AOTed and normal JITed code.
+ // Static call stub in AOTed nmethod always has far jump.
+ // Normal JITed nmethod may have short or far jump depending on distance.
+ // Determine actual jump instruction we have in code.
+ address next_instr = method_holder->next_instruction_address();
+ bool is_general_jump = nativeInstruction_at(next_instr)->is_general_jump();
+
#ifdef ASSERT
- NativeJump* jump = MacroAssembler::codestub_branch_needs_far_jump()
- ? nativeGeneralJump_at(method_holder->next_instruction_address())
- : nativeJump_at(method_holder->next_instruction_address());
+ NativeJump* jump = is_general_jump ? nativeGeneralJump_at(next_instr) : nativeJump_at(next_instr);
verify_mt_safe(callee, entry, method_holder, jump);
#endif
// Update stub.
method_holder->set_data((intptr_t)callee());
- MacroAssembler::pd_patch_instruction(method_holder->next_instruction_address(), entry);
+ MacroAssembler::pd_patch_instruction(next_instr, entry);
ICache::invalidate_range(stub, to_interp_stub_size());
// Update jump to call.
set_destination_mt_safe(stub);
diff --git a/src/hotspot/cpu/aarch64/downcallLinker_aarch64.cpp b/src/hotspot/cpu/aarch64/downcallLinker_aarch64.cpp
index 65d448f908c..130d2949800 100644
--- a/src/hotspot/cpu/aarch64/downcallLinker_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/downcallLinker_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -146,10 +146,10 @@ void DowncallLinker::StubGenerator::generate() {
bool should_save_return_value = !_needs_return_buffer;
RegSpiller out_reg_spiller(_output_registers);
- int spill_offset = -1;
+ int out_spill_offset = -1;
if (should_save_return_value) {
- spill_offset = 0;
+ out_spill_offset = 0;
// spill area can be shared with shadow space and out args,
// since they are only used before the call,
// and spill area is only used after.
@@ -174,6 +174,9 @@ void DowncallLinker::StubGenerator::generate() {
// FP-> | |
// |---------------------| = frame_bottom_offset = frame_size
// | (optional) |
+ // | in_reg_spiller area |
+ // |---------------------|
+ // | (optional) |
// | capture state buf |
// |---------------------| = StubLocations::CAPTURED_STATE_BUFFER
// | (optional) |
@@ -187,6 +190,19 @@ void DowncallLinker::StubGenerator::generate() {
GrowableArray out_regs = ForeignGlobals::replace_place_holders(_input_registers, locs);
ArgumentShuffle arg_shuffle(filtered_java_regs, out_regs, shuffle_reg);
+ // Need to spill for state capturing runtime call.
+ // The area spilled into is distinct from the capture state buffer.
+ RegSpiller in_reg_spiller(out_regs);
+ int in_spill_offset = -1;
+ if (_captured_state_mask != 0) {
+ // The spill area cannot be shared with the out_spill since
+ // spilling needs to happen before the call. Allocate a new
+ // region in the stack for this spill space.
+ in_spill_offset = allocated_frame_size;
+ allocated_frame_size += in_reg_spiller.spill_size_bytes();
+ }
+
+
#ifndef PRODUCT
LogTarget(Trace, foreign, downcall) lt;
if (lt.is_enabled()) {
@@ -228,6 +244,20 @@ void DowncallLinker::StubGenerator::generate() {
arg_shuffle.generate(_masm, shuffle_reg, 0, _abi._shadow_space_bytes);
__ block_comment("} argument shuffle");
+ if (_captured_state_mask != 0) {
+ assert(in_spill_offset != -1, "must be");
+ __ block_comment("{ load initial thread local");
+ in_reg_spiller.generate_spill(_masm, in_spill_offset);
+
+ // Copy the contents of the capture state buffer into thread local
+ __ ldr(c_rarg0, Address(sp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
+ __ movw(c_rarg1, _captured_state_mask);
+ __ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_pre), tmp1);
+
+ in_reg_spiller.generate_fill(_masm, in_spill_offset);
+ __ block_comment("} load initial thread local");
+ }
+
__ blr(as_Register(locs.get(StubLocations::TARGET_ADDRESS)));
// this call is assumed not to have killed rthread
@@ -254,15 +284,15 @@ void DowncallLinker::StubGenerator::generate() {
__ block_comment("{ save thread local");
if (should_save_return_value) {
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
}
__ ldr(c_rarg0, Address(sp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
__ movw(c_rarg1, _captured_state_mask);
- __ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state), tmp1);
+ __ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_post), tmp1);
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
}
__ block_comment("} save thread local");
@@ -321,7 +351,7 @@ void DowncallLinker::StubGenerator::generate() {
if (should_save_return_value) {
// Need to save the native result registers around any runtime calls.
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
}
__ mov(c_rarg0, rthread);
@@ -330,7 +360,7 @@ void DowncallLinker::StubGenerator::generate() {
__ blr(tmp1);
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
}
__ b(L_after_safepoint_poll);
@@ -342,13 +372,13 @@ void DowncallLinker::StubGenerator::generate() {
__ bind(L_reguard);
if (should_save_return_value) {
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
}
__ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), tmp1);
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
}
__ b(L_after_reguard);
diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp
index cb53d8663ad..748ab0e0e2b 100644
--- a/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp
+++ b/src/hotspot/cpu/aarch64/frame_aarch64.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -245,8 +245,8 @@ inline bool frame::equal(frame other) const {
// Return unique id for this frame. The id must have a value where we can distinguish
// identity and younger/older relationship. null represents an invalid (incomparable)
-// frame.
-inline intptr_t* frame::id(void) const { return unextended_sp(); }
+// frame. Should not be called for heap frames.
+inline intptr_t* frame::id(void) const { return real_fp(); }
// Return true if the frame is older (less recent activation) than the frame represented by id
inline bool frame::is_older(intptr_t* id) const { assert(this->id() != nullptr && id != nullptr, "null frame id");
@@ -412,6 +412,9 @@ inline frame frame::sender(RegisterMap* map) const {
StackWatermarkSet::on_iteration(map->thread(), result);
}
+ // Calling frame::id() is currently not supported for heap frames.
+ assert(result._on_heap || this->_on_heap || result.is_older(this->id()), "Must be");
+
return result;
}
diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
index d7884c27a2c..68291720208 100644
--- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
@@ -23,6 +23,7 @@
*/
#include "asm/macroAssembler.inline.hpp"
+#include "code/aotCodeCache.hpp"
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1BarrierSetAssembler.hpp"
#include "gc/g1/g1BarrierSetRuntime.hpp"
@@ -243,9 +244,25 @@ static void generate_post_barrier(MacroAssembler* masm,
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg, rscratch1);
// Does store cross heap regions?
- __ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
- __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
- __ cbz(tmp1, done);
+ #if INCLUDE_CDS
+ // AOT code needs to load the barrier grain shift from the aot
+ // runtime constants area in the code cache otherwise we can compile
+ // it as an immediate operand
+ if (AOTCodeCache::is_on_for_dump()) {
+ address grain_shift_address = (address)AOTRuntimeConstants::grain_shift_address();
+ __ eor(tmp1, store_addr, new_val);
+ __ lea(tmp2, ExternalAddress(grain_shift_address));
+ __ ldrb(tmp2, tmp2);
+ __ lsrv(tmp1, tmp1, tmp2);
+ __ cbz(tmp1, done);
+ } else
+#endif
+ {
+ __ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
+ __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
+ __ cbz(tmp1, done);
+ }
+
// Crosses regions, storing null?
if (new_val_may_be_null) {
__ cbz(new_val, done);
diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp
index 021af3e5698..2a78d688097 100644
--- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -441,6 +441,11 @@ OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Na
return opto_reg;
}
+void BarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
+ // Load the oop from the weak handle.
+ __ ldr(obj, Address(obj));
+}
+
#undef __
#define __ _masm->
diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp
index e69be999f00..c2581b2f962 100644
--- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -135,6 +135,7 @@ public:
OptoReg::Name opto_reg);
OptoReg::Name refine_register(const Node* node,
OptoReg::Name opto_reg);
+ virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
#endif // COMPILER2
};
diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp
index 4d5ca01b6b4..3d5261c31d1 100644
--- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp
@@ -209,6 +209,10 @@ void BarrierSetNMethod::set_guard_value(nmethod* nm, int value, int bit_mask) {
bs_asm->increment_patching_epoch();
}
+ // Enable WXWrite: the function is called directly from nmethod_entry_barrier
+ // stub.
+ MACOS_AARCH64_ONLY(ThreadWXEnable wx(WXWrite, Thread::current()));
+
NativeNMethodBarrier barrier(nm);
barrier.set_value(value, bit_mask);
}
diff --git a/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp
index 0bfc320179d..7ce4e0f8aed 100644
--- a/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -56,8 +56,10 @@ void CardTableBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet d
}
}
-void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {
-
+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2) {
+ precond(tmp1 != noreg);
+ precond(tmp2 != noreg);
+ assert_different_registers(obj, tmp1, tmp2);
BarrierSet* bs = BarrierSet::barrier_set();
assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
@@ -65,16 +67,16 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
assert(CardTable::dirty_card_val() == 0, "must be");
- __ load_byte_map_base(rscratch1);
+ __ load_byte_map_base(tmp1);
if (UseCondCardMark) {
Label L_already_dirty;
- __ ldrb(rscratch2, Address(obj, rscratch1));
- __ cbz(rscratch2, L_already_dirty);
- __ strb(zr, Address(obj, rscratch1));
+ __ ldrb(tmp2, Address(obj, tmp1));
+ __ cbz(tmp2, L_already_dirty);
+ __ strb(zr, Address(obj, tmp1));
__ bind(L_already_dirty);
} else {
- __ strb(zr, Address(obj, rscratch1));
+ __ strb(zr, Address(obj, tmp1));
}
}
@@ -112,10 +114,10 @@ void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorS
if (needs_post_barrier) {
// flatten object address if needed
if (!precise || (dst.index() == noreg && dst.offset() == 0)) {
- store_check(masm, dst.base(), dst);
+ store_check(masm, dst.base(), tmp1, tmp2);
} else {
__ lea(tmp3, dst);
- store_check(masm, tmp3, dst);
+ store_check(masm, tmp3, tmp1, tmp2);
}
}
}
diff --git a/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp
index 07dd8eb5565..07016381f78 100644
--- a/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -46,7 +46,7 @@ protected:
virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
- void store_check(MacroAssembler* masm, Register obj, Address dst);
+ void store_check(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2);
};
#endif // CPU_AARCH64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
index 9a035d9f40e..2f7707227b4 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -85,26 +86,16 @@ void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, Dec
}
}
-void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register thread,
- Register tmp,
- bool tosca_live,
- bool expand_call) {
- if (ShenandoahSATBBarrier) {
- satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, rscratch1, tosca_live, expand_call);
- }
-}
+void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ bool tosca_live,
+ bool expand_call) {
+ assert(ShenandoahSATBBarrier, "Should be checked by caller");
-void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register thread,
- Register tmp1,
- Register tmp2,
- bool tosca_live,
- bool expand_call) {
// If expand_call is true then we expand the call_VM_leaf macro
// directly to skip generating the check by
// InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
@@ -358,20 +349,20 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
__ enter(/*strip_ret_addr*/true);
__ push_call_clobbered_registers();
- satb_write_barrier_pre(masm /* masm */,
- noreg /* obj */,
- dst /* pre_val */,
- rthread /* thread */,
- tmp1 /* tmp1 */,
- tmp2 /* tmp2 */,
- true /* tosca_live */,
- true /* expand_call */);
+ satb_barrier(masm /* masm */,
+ noreg /* obj */,
+ dst /* pre_val */,
+ rthread /* thread */,
+ tmp1 /* tmp1 */,
+ tmp2 /* tmp2 */,
+ true /* tosca_live */,
+ true /* expand_call */);
__ pop_call_clobbered_registers();
__ leave();
}
}
-void ShenandoahBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj) {
+void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
assert(ShenandoahCardBarrier, "Should have been checked by caller");
__ lsr(obj, obj, CardTable::card_shift());
@@ -394,13 +385,13 @@ void ShenandoahBarrierSetAssembler::store_check(MacroAssembler* masm, Register o
void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
- bool on_oop = is_reference_type(type);
- if (!on_oop) {
+ // 1: non-reference types require no barriers
+ if (!is_reference_type(type)) {
BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
return;
}
- // flatten object address if needed
+ // Flatten object address right away for simplicity: likely needed by barriers
if (dst.index() == noreg && dst.offset() == 0) {
if (dst.base() != tmp3) {
__ mov(tmp3, dst.base());
@@ -409,20 +400,26 @@ void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet
__ lea(tmp3, dst);
}
- shenandoah_write_barrier_pre(masm,
- tmp3 /* obj */,
- tmp2 /* pre_val */,
- rthread /* thread */,
- tmp1 /* tmp */,
- val != noreg /* tosca_live */,
- false /* expand_call */);
+ bool storing_non_null = (val != noreg);
+ // 2: pre-barrier: SATB needs the previous value
+ if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
+ satb_barrier(masm,
+ tmp3 /* obj */,
+ tmp2 /* pre_val */,
+ rthread /* thread */,
+ tmp1 /* tmp */,
+ rscratch1 /* tmp2 */,
+ storing_non_null /* tosca_live */,
+ false /* expand_call */);
+ }
+
+ // Store!
BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
- bool in_heap = (decorators & IN_HEAP) != 0;
- bool needs_post_barrier = (val != noreg) && in_heap && ShenandoahCardBarrier;
- if (needs_post_barrier) {
- store_check(masm, tmp3);
+ // 3: post-barrier: card barrier needs store address
+ if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
+ card_barrier(masm, tmp3);
}
}
@@ -446,6 +443,30 @@ void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler
__ bind(done);
}
+#ifdef COMPILER2
+void ShenandoahBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj,
+ Register tmp, Label& slow_path) {
+ assert_different_registers(obj, tmp);
+
+ Label done;
+
+ // Resolve weak handle using the standard implementation.
+ BarrierSetAssembler::try_resolve_weak_handle_in_c2(masm, obj, tmp, slow_path);
+
+ // Check if the reference is null, and if it is, take the fast path.
+ __ cbz(obj, done);
+
+ Address gc_state(rthread, ShenandoahThreadLocalData::gc_state_offset());
+ __ lea(tmp, gc_state);
+ __ ldrb(tmp, __ legitimize_address(gc_state, 1, tmp));
+
+ // Check if the heap is under weak-reference/roots processing, in
+ // which case we need to take the slow path.
+ __ tbnz(tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS, slow_path);
+ __ bind(done);
+}
+#endif
+
// Special Shenandoah CAS implementation that handles false negatives due
// to concurrent evacuation. The service is more complex than a
// traditional CAS operation because the CAS operation is intended to
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
index c0e708e1292..d5d5ce8950e 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -40,23 +41,16 @@ class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
private:
- void satb_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register thread,
- Register tmp1,
- Register tmp2,
- bool tosca_live,
- bool expand_call);
- void shenandoah_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register thread,
- Register tmp,
- bool tosca_live,
- bool expand_call);
+ void satb_barrier(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ bool tosca_live,
+ bool expand_call);
- void store_check(MacroAssembler* masm, Register obj);
+ void card_barrier(MacroAssembler* masm, Register obj);
void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
@@ -86,6 +80,9 @@ public:
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
+#ifdef COMPILER2
+ virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
+#endif
void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
bool acquire, bool release, bool is_cae, Register result);
};
diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
index 07a2d6fbfa0..f0885fee93d 100644
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -879,7 +879,9 @@ void ZBarrierSetAssembler::patch_barrier_relocation(address addr, int format) {
ShouldNotReachHere();
}
- ICache::invalidate_word((address)patch_addr);
+ if (!UseSingleICacheInvalidation) {
+ ICache::invalidate_word((address)patch_addr);
+ }
}
#ifdef COMPILER1
@@ -1326,6 +1328,23 @@ void ZStoreBarrierStubC2Aarch64::emit_code(MacroAssembler& masm) {
register_stub(this);
}
+#undef __
+#define __ masm->
+
+void ZBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
+ // Resolve weak handle using the standard implementation.
+ BarrierSetAssembler::try_resolve_weak_handle_in_c2(masm, obj, tmp, slow_path);
+
+ // Check if the oop is bad, in which case we need to take the slow path.
+ __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatMarkBadBeforeMov);
+ __ movzw(tmp, barrier_Relocation::unpatched);
+ __ tst(obj, tmp);
+ __ br(Assembler::NE, slow_path);
+
+ // Oop is okay, so we uncolor it.
+ __ lsr(obj, obj, ZPointerLoadShift);
+}
+
#undef __
#endif // COMPILER2
diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp
index 487970ab0c5..fbbc5c1b517 100644
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -191,6 +191,7 @@ public:
ZLoadBarrierStubC2* stub) const;
void generate_c2_store_barrier_stub(MacroAssembler* masm,
ZStoreBarrierStubC2* stub) const;
+ void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
#endif // COMPILER2
void check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error);
diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
index 8e520314c8b..dfeba73bede 100644
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2019, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -39,7 +39,7 @@ define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap nulls
define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_OR_JVMCI);
define_pd_global(size_t, CodeCacheSegmentSize, 64);
-define_pd_global(intx, CodeEntryAlignment, 64);
+define_pd_global(uint, CodeEntryAlignment, 64);
define_pd_global(intx, OptoLoopAlignment, 16);
#define DEFAULT_STACK_YELLOW_PAGES (2)
@@ -95,7 +95,7 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Use simplest and shortest implementation for array equals") \
product(bool, UseSIMDForBigIntegerShiftIntrinsics, true, \
"Use SIMD instructions for left/right shift of BigInteger") \
- product(bool, UseSIMDForSHA3Intrinsic, true, \
+ product(bool, UseSIMDForSHA3Intrinsic, false, \
"Use SIMD SHA3 instructions for SHA3 intrinsic") \
product(bool, AvoidUnalignedAccesses, false, \
"Avoid generating unaligned memory accesses") \
@@ -115,18 +115,26 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Value -1 means off.") \
range(-1, 4096) \
product(ccstr, OnSpinWaitInst, "yield", DIAGNOSTIC, \
- "The instruction to use to implement " \
- "java.lang.Thread.onSpinWait()." \
- "Valid values are: none, nop, isb, yield, sb.") \
+ "The instruction to use for java.lang.Thread.onSpinWait(). " \
+ "Valid values are: none, nop, isb, yield, sb, wfet.") \
constraint(OnSpinWaitInstNameConstraintFunc, AtParse) \
product(uint, OnSpinWaitInstCount, 1, DIAGNOSTIC, \
- "The number of OnSpinWaitInst instructions to generate." \
- "It cannot be used with OnSpinWaitInst=none.") \
+ "The number of OnSpinWaitInst instructions to generate. " \
+ "It cannot be used with OnSpinWaitInst=none. " \
+ "For OnSpinWaitInst=wfet it must be 1.") \
range(1, 99) \
+ product(uint, OnSpinWaitDelay, 40, DIAGNOSTIC, \
+ "The minimum delay (in nanoseconds) of the OnSpinWait loop. " \
+ "It can only be used with -XX:OnSpinWaitInst=wfet.") \
+ range(1, 1000) \
product(ccstr, UseBranchProtection, "none", \
"Branch Protection to use: none, standard, pac-ret") \
product(bool, AlwaysMergeDMB, true, DIAGNOSTIC, \
"Always merge DMB instructions in code emission") \
+ product(bool, NeoverseN1ICacheErratumMitigation, false, DIAGNOSTIC, \
+ "Enable workaround for Neoverse N1 erratum 1542419") \
+ product(bool, UseSingleICacheInvalidation, false, DIAGNOSTIC, \
+ "Defer multiple ICache invalidation to single invalidation") \
// end of ARCH_FLAGS
diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
index 957c2aee1c1..980fedb406d 100644
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -240,15 +240,14 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset(
// Rsub_klass: subklass
//
// Kills:
-// r2, r5
+// r2
void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
Label& ok_is_subtype) {
assert(Rsub_klass != r0, "r0 holds superklass");
assert(Rsub_klass != r2, "r2 holds 2ndary super array length");
- assert(Rsub_klass != r5, "r5 holds 2ndary super array scan ptr");
// Profile the not-null value's klass.
- profile_typecheck(r2, Rsub_klass, r5); // blows r2, reloads r5
+ profile_typecheck(r2, Rsub_klass); // blows r2
// Do the check.
check_klass_subtype(Rsub_klass, r0, r2, ok_is_subtype); // blows r2
@@ -990,27 +989,15 @@ void InterpreterMacroAssembler::profile_final_call(Register mdp) {
void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
- Register mdp,
- Register reg2,
- bool receiver_can_be_null) {
+ Register mdp) {
if (ProfileInterpreter) {
Label profile_continue;
// If no method data exists, go to profile_continue.
test_method_data_pointer(mdp, profile_continue);
- Label skip_receiver_profile;
- if (receiver_can_be_null) {
- Label not_null;
- // We are making a call. Increment the count for null receiver.
- increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
- b(skip_receiver_profile);
- bind(not_null);
- }
-
// Record the receiver type.
- record_klass_in_profile(receiver, mdp, reg2);
- bind(skip_receiver_profile);
+ profile_receiver_type(receiver, mdp, 0);
// The method data pointer needs to be updated to reflect the new target.
update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
@@ -1018,131 +1005,6 @@ void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
}
}
-// This routine creates a state machine for updating the multi-row
-// type profile at a virtual call site (or other type-sensitive bytecode).
-// The machine visits each row (of receiver/count) until the receiver type
-// is found, or until it runs out of rows. At the same time, it remembers
-// the location of the first empty row. (An empty row records null for its
-// receiver, and can be allocated for a newly-observed receiver type.)
-// Because there are two degrees of freedom in the state, a simple linear
-// search will not work; it must be a decision tree. Hence this helper
-// function is recursive, to generate the required tree structured code.
-// It's the interpreter, so we are trading off code space for speed.
-// See below for example code.
-void InterpreterMacroAssembler::record_klass_in_profile_helper(
- Register receiver, Register mdp,
- Register reg2, int start_row,
- Label& done) {
- if (TypeProfileWidth == 0) {
- increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
- } else {
- record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
- &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset);
- }
-}
-
-void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
- Register reg2, int start_row, Label& done, int total_rows,
- OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn) {
- int last_row = total_rows - 1;
- assert(start_row <= last_row, "must be work left to do");
- // Test this row for both the item and for null.
- // Take any of three different outcomes:
- // 1. found item => increment count and goto done
- // 2. found null => keep looking for case 1, maybe allocate this cell
- // 3. found something else => keep looking for cases 1 and 2
- // Case 3 is handled by a recursive call.
- for (int row = start_row; row <= last_row; row++) {
- Label next_test;
- bool test_for_null_also = (row == start_row);
-
- // See if the item is item[n].
- int item_offset = in_bytes(item_offset_fn(row));
- test_mdp_data_at(mdp, item_offset, item,
- (test_for_null_also ? reg2 : noreg),
- next_test);
- // (Reg2 now contains the item from the CallData.)
-
- // The item is item[n]. Increment count[n].
- int count_offset = in_bytes(item_count_offset_fn(row));
- increment_mdp_data_at(mdp, count_offset);
- b(done);
- bind(next_test);
-
- if (test_for_null_also) {
- Label found_null;
- // Failed the equality check on item[n]... Test for null.
- if (start_row == last_row) {
- // The only thing left to do is handle the null case.
- cbz(reg2, found_null);
- // Item did not match any saved item and there is no empty row for it.
- // Increment total counter to indicate polymorphic case.
- increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
- b(done);
- bind(found_null);
- break;
- }
- // Since null is rare, make it be the branch-taken case.
- cbz(reg2, found_null);
-
- // Put all the "Case 3" tests here.
- record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
- item_offset_fn, item_count_offset_fn);
-
- // Found a null. Keep searching for a matching item,
- // but remember that this is an empty (unused) slot.
- bind(found_null);
- }
- }
-
- // In the fall-through case, we found no matching item, but we
- // observed the item[start_row] is null.
-
- // Fill in the item field and increment the count.
- int item_offset = in_bytes(item_offset_fn(start_row));
- set_mdp_data_at(mdp, item_offset, item);
- int count_offset = in_bytes(item_count_offset_fn(start_row));
- mov(reg2, DataLayout::counter_increment);
- set_mdp_data_at(mdp, count_offset, reg2);
- if (start_row > 0) {
- b(done);
- }
-}
-
-// Example state machine code for three profile rows:
-// // main copy of decision tree, rooted at row[1]
-// if (row[0].rec == rec) { row[0].incr(); goto done; }
-// if (row[0].rec != nullptr) {
-// // inner copy of decision tree, rooted at row[1]
-// if (row[1].rec == rec) { row[1].incr(); goto done; }
-// if (row[1].rec != nullptr) {
-// // degenerate decision tree, rooted at row[2]
-// if (row[2].rec == rec) { row[2].incr(); goto done; }
-// if (row[2].rec != nullptr) { count.incr(); goto done; } // overflow
-// row[2].init(rec); goto done;
-// } else {
-// // remember row[1] is empty
-// if (row[2].rec == rec) { row[2].incr(); goto done; }
-// row[1].init(rec); goto done;
-// }
-// } else {
-// // remember row[0] is empty
-// if (row[1].rec == rec) { row[1].incr(); goto done; }
-// if (row[2].rec == rec) { row[2].incr(); goto done; }
-// row[0].init(rec); goto done;
-// }
-// done:
-
-void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
- Register mdp, Register reg2) {
- assert(ProfileInterpreter, "must be profiling");
- Label done;
-
- record_klass_in_profile_helper(receiver, mdp, reg2, 0, done);
-
- bind (done);
-}
-
void InterpreterMacroAssembler::profile_ret(Register return_bci,
Register mdp) {
if (ProfileInterpreter) {
@@ -1200,7 +1062,7 @@ void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
}
}
-void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass) {
if (ProfileInterpreter) {
Label profile_continue;
@@ -1213,7 +1075,7 @@ void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass,
mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
// Record the object type.
- record_klass_in_profile(klass, mdp, reg2);
+ profile_receiver_type(klass, mdp, 0);
}
update_mdp_by_constant(mdp, mdp_delta);
diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp
index 2b230a3b73e..9a074f1ce69 100644
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -273,15 +273,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
Register test_value_out,
Label& not_equal_continue);
- void record_klass_in_profile(Register receiver, Register mdp,
- Register reg2);
- void record_klass_in_profile_helper(Register receiver, Register mdp,
- Register reg2, int start_row,
- Label& done);
- void record_item_in_profile_helper(Register item, Register mdp,
- Register reg2, int start_row, Label& done, int total_rows,
- OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn);
-
void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
void update_mdp_by_constant(Register mdp_in, int constant);
@@ -294,12 +285,10 @@ class InterpreterMacroAssembler: public MacroAssembler {
void profile_not_taken_branch(Register mdp);
void profile_call(Register mdp);
void profile_final_call(Register mdp);
- void profile_virtual_call(Register receiver, Register mdp,
- Register scratch2,
- bool receiver_can_be_null = false);
+ void profile_virtual_call(Register receiver, Register mdp);
void profile_ret(Register return_bci, Register mdp);
void profile_null_seen(Register mdp);
- void profile_typecheck(Register mdp, Register klass, Register scratch);
+ void profile_typecheck(Register mdp, Register klass);
void profile_typecheck_failed(Register mdp);
void profile_switch_default(Register mdp);
void profile_switch_case(Register index_in_scratch, Register mdp,
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index 27428a5c558..7bec0a3c0ca 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -55,6 +55,7 @@
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/globalDefinitions.hpp"
+#include "utilities/integerCast.hpp"
#include "utilities/powerOfTwo.hpp"
#ifdef COMPILER1
#include "c1/c1_LIRAssembler.hpp"
@@ -406,7 +407,6 @@ public:
offset <<= shift;
uint64_t target_page = ((uint64_t)insn_addr) + offset;
target_page &= ((uint64_t)-1) << shift;
- uint32_t insn2 = insn_at(insn_addr, 1);
target = address(target_page);
precond(inner != nullptr);
inner(insn_addr, target);
@@ -473,6 +473,7 @@ address MacroAssembler::target_addr_for_insn(address insn_addr) {
// Patch any kind of instruction; there may be several instructions.
// Return the total length (in bytes) of the instructions.
int MacroAssembler::pd_patch_instruction_size(address insn_addr, address target) {
+ MACOS_AARCH64_ONLY(os::thread_wx_enable_write());
return RelocActions::run(insn_addr, target);
}
@@ -481,6 +482,8 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
unsigned insn = *(unsigned*)insn_addr;
assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
+ MACOS_AARCH64_ONLY(os::thread_wx_enable_write());
+
// OOPs are either narrow (32 bits) or wide (48 bits). We encode
// narrow OOPs by setting the upper 16 bits in the first
// instruction.
@@ -510,18 +513,13 @@ int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
+ MACOS_AARCH64_ONLY(os::thread_wx_enable_write());
+
Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
return 2 * NativeInstruction::instruction_size;
}
-address MacroAssembler::target_addr_for_insn_or_null(address insn_addr) {
- if (NativeInstruction::is_ldrw_to_zr(insn_addr)) {
- return nullptr;
- }
- return MacroAssembler::target_addr_for_insn(insn_addr);
-}
-
void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp) {
ldr(tmp, Address(rthread, JavaThread::polling_word_offset()));
if (at_return) {
@@ -765,7 +763,7 @@ void MacroAssembler::call_VM_base(Register oop_result,
assert(java_thread == rthread, "unexpected register");
#ifdef ASSERT
// TraceBytecodes does not use r12 but saves it over the call, so don't verify
- // if ((UseCompressedOops || UseCompressedClassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");
+ // if (!TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");
#endif // ASSERT
assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
@@ -955,7 +953,10 @@ void MacroAssembler::emit_static_call_stub() {
}
int MacroAssembler::static_call_stub_size() {
- if (!codestub_branch_needs_far_jump()) {
+ // During AOT production run AOT and JIT compiled code
+ // are used at the same time. We need this size
+ // to be the same for both types of code.
+ if (!codestub_branch_needs_far_jump() && !AOTCodeCache::is_on_for_use()) {
// isb; movk; movz; movz; b
return 5 * NativeInstruction::instruction_size;
}
@@ -1005,14 +1006,10 @@ int MacroAssembler::ic_check(int end_alignment) {
load_narrow_klass_compact(tmp1, receiver);
ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
cmpw(tmp1, tmp2);
- } else if (UseCompressedClassPointers) {
+ } else {
ldrw(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
ldrw(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
cmpw(tmp1, tmp2);
- } else {
- ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
- ldr(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
- cmp(tmp1, tmp2);
}
Label dont;
@@ -1955,9 +1952,7 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
const Register
r_array_base = temp1,
- r_array_length = temp2,
- r_array_index = noreg, // unused
- r_bitmap = noreg; // unused
+ r_array_length = temp2;
BLOCK_COMMENT("verify_secondary_supers_table {");
@@ -2118,6 +2113,161 @@ Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
}
}
+// Handle the receiver type profile update given the "recv" klass.
+//
+// Normally updates the ReceiverData (RD) that starts at "mdp" + "mdp_offset".
+// If there are no matching or claimable receiver entries in RD, updates
+// the polymorphic counter.
+//
+// This code expected to run by either the interpreter or JIT-ed code, without
+// extra synchronization. For safety, receiver cells are claimed atomically, which
+// avoids grossly misrepresenting the profiles under concurrent updates. For speed,
+// counter updates are not atomic.
+//
+void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_offset) {
+ assert_different_registers(recv, mdp, rscratch1, rscratch2);
+
+ int base_receiver_offset = in_bytes(ReceiverTypeData::receiver_offset(0));
+ int end_receiver_offset = in_bytes(ReceiverTypeData::receiver_offset(ReceiverTypeData::row_limit()));
+ int poly_count_offset = in_bytes(CounterData::count_offset());
+ int receiver_step = in_bytes(ReceiverTypeData::receiver_offset(1)) - base_receiver_offset;
+ int receiver_to_count_step = in_bytes(ReceiverTypeData::receiver_count_offset(0)) - base_receiver_offset;
+
+ // Adjust for MDP offsets.
+ base_receiver_offset += mdp_offset;
+ end_receiver_offset += mdp_offset;
+ poly_count_offset += mdp_offset;
+
+#ifdef ASSERT
+ // We are about to walk the MDO slots without asking for offsets.
+ // Check that our math hits all the right spots.
+ for (uint c = 0; c < ReceiverTypeData::row_limit(); c++) {
+ int real_recv_offset = mdp_offset + in_bytes(ReceiverTypeData::receiver_offset(c));
+ int real_count_offset = mdp_offset + in_bytes(ReceiverTypeData::receiver_count_offset(c));
+ int offset = base_receiver_offset + receiver_step*c;
+ int count_offset = offset + receiver_to_count_step;
+ assert(offset == real_recv_offset, "receiver slot math");
+ assert(count_offset == real_count_offset, "receiver count math");
+ }
+ int real_poly_count_offset = mdp_offset + in_bytes(CounterData::count_offset());
+ assert(poly_count_offset == real_poly_count_offset, "poly counter math");
+#endif
+
+ // Corner case: no profile table. Increment poly counter and exit.
+ if (ReceiverTypeData::row_limit() == 0) {
+ increment(Address(mdp, poly_count_offset), DataLayout::counter_increment);
+ return;
+ }
+
+ Register offset = rscratch2;
+
+ Label L_loop_search_receiver, L_loop_search_empty;
+ Label L_restart, L_found_recv, L_found_empty, L_polymorphic, L_count_update;
+
+ // The code here recognizes three major cases:
+ // A. Fastest: receiver found in the table
+ // B. Fast: no receiver in the table, and the table is full
+ // C. Slow: no receiver in the table, free slots in the table
+ //
+ // The case A performance is most important, as perfectly-behaved code would end up
+ // there, especially with larger TypeProfileWidth. The case B performance is
+ // important as well, this is where bulk of code would land for normally megamorphic
+ // cases. The case C performance is not essential, its job is to deal with installation
+ // races, we optimize for code density instead. Case C needs to make sure that receiver
+ // rows are only claimed once. This makes sure we never overwrite a row for another
+ // receiver and never duplicate the receivers in the list, making profile type-accurate.
+ //
+ // It is very tempting to handle these cases in a single loop, and claim the first slot
+ // without checking the rest of the table. But, profiling code should tolerate free slots
+ // in the table, as class unloading can clear them. After such cleanup, the receiver
+ // we need might be _after_ the free slot. Therefore, we need to let at least full scan
+ // to complete, before trying to install new slots. Splitting the code in several tight
+ // loops also helpfully optimizes for cases A and B.
+ //
+ // This code is effectively:
+ //
+ // restart:
+ // // Fastest: receiver is already installed
+ // for (i = 0; i < receiver_count(); i++) {
+ // if (receiver(i) == recv) goto found_recv(i);
+ // }
+ //
+ // // Fast: no receiver, but profile is full
+ // for (i = 0; i < receiver_count(); i++) {
+ // if (receiver(i) == null) goto found_null(i);
+ // }
+ // goto polymorphic
+ //
+ // // Slow: try to install receiver
+ // found_null(i):
+ // CAS(&receiver(i), null, recv);
+ // goto restart
+ //
+ // polymorphic:
+ // count++;
+ // return
+ //
+ // found_recv(i):
+ // *receiver_count(i)++
+ //
+
+ bind(L_restart);
+
+ // Fastest: receiver is already installed
+ mov(offset, base_receiver_offset);
+ bind(L_loop_search_receiver);
+ ldr(rscratch1, Address(mdp, offset));
+ cmp(rscratch1, recv);
+ br(Assembler::EQ, L_found_recv);
+ add(offset, offset, receiver_step);
+ sub(rscratch1, offset, end_receiver_offset);
+ cbnz(rscratch1, L_loop_search_receiver);
+
+ // Fast: no receiver, but profile is full
+ mov(offset, base_receiver_offset);
+ bind(L_loop_search_empty);
+ ldr(rscratch1, Address(mdp, offset));
+ cbz(rscratch1, L_found_empty);
+ add(offset, offset, receiver_step);
+ sub(rscratch1, offset, end_receiver_offset);
+ cbnz(rscratch1, L_loop_search_empty);
+ b(L_polymorphic);
+
+ // Slow: try to install receiver
+ bind(L_found_empty);
+
+ // Atomically swing receiver slot: null -> recv.
+ //
+ // The update uses CAS, which clobbers rscratch1. Therefore, rscratch2
+ // is used to hold the destination address. This is safe because the
+ // offset is no longer needed after the address is computed.
+
+ lea(rscratch2, Address(mdp, offset));
+ cmpxchg(/*addr*/ rscratch2, /*expected*/ zr, /*new*/ recv, Assembler::xword,
+ /*acquire*/ false, /*release*/ false, /*weak*/ true, noreg);
+
+ // CAS success means the slot now has the receiver we want. CAS failure means
+ // something had claimed the slot concurrently: it can be the same receiver we want,
+ // or something else. Since this is a slow path, we can optimize for code density,
+ // and just restart the search from the beginning.
+ b(L_restart);
+
+ // Counter updates:
+
+ // Increment polymorphic counter instead of receiver slot.
+ bind(L_polymorphic);
+ mov(offset, poly_count_offset);
+ b(L_count_update);
+
+ // Found a receiver, convert its slot offset to corresponding count offset.
+ bind(L_found_recv);
+ add(offset, offset, receiver_to_count_step);
+
+ bind(L_count_update);
+ increment(Address(mdp, offset), DataLayout::counter_increment);
+}
+
+
void MacroAssembler::call_VM_leaf_base(address entry_point,
int number_of_arguments,
Label *retaddr) {
@@ -2767,7 +2917,11 @@ void MacroAssembler::increment(Address dst, int value)
// Push lots of registers in the bit set supplied. Don't push sp.
// Return the number of words pushed
-int MacroAssembler::push(unsigned int bitset, Register stack) {
+int MacroAssembler::push(RegSet regset, Register stack) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
int words_pushed = 0;
// Scan bitset to accumulate register pairs
@@ -2797,7 +2951,11 @@ int MacroAssembler::push(unsigned int bitset, Register stack) {
return count;
}
-int MacroAssembler::pop(unsigned int bitset, Register stack) {
+int MacroAssembler::pop(RegSet regset, Register stack) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
int words_pushed = 0;
// Scan bitset to accumulate register pairs
@@ -2829,7 +2987,11 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
// Push lots of registers in the bit set supplied. Don't push sp.
// Return the number of dwords pushed
-int MacroAssembler::push_fp(unsigned int bitset, Register stack, FpPushPopMode mode) {
+int MacroAssembler::push_fp(FloatRegSet regset, Register stack, FpPushPopMode mode) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
int words_pushed = 0;
bool use_sve = false;
int sve_vector_size_in_bytes = 0;
@@ -2942,7 +3104,11 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack, FpPushPopMode m
}
// Return the number of dwords popped
-int MacroAssembler::pop_fp(unsigned int bitset, Register stack, FpPushPopMode mode) {
+int MacroAssembler::pop_fp(FloatRegSet regset, Register stack, FpPushPopMode mode) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
int words_pushed = 0;
bool use_sve = false;
int sve_vector_size_in_bytes = 0;
@@ -3052,7 +3218,11 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack, FpPushPopMode mo
}
// Return the number of dwords pushed
-int MacroAssembler::push_p(unsigned int bitset, Register stack) {
+int MacroAssembler::push_p(PRegSet regset, Register stack) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
bool use_sve = false;
int sve_predicate_size_in_slots = 0;
@@ -3089,7 +3259,11 @@ int MacroAssembler::push_p(unsigned int bitset, Register stack) {
}
// Return the number of dwords popped
-int MacroAssembler::pop_p(unsigned int bitset, Register stack) {
+int MacroAssembler::pop_p(PRegSet regset, Register stack) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
bool use_sve = false;
int sve_predicate_size_in_slots = 0;
@@ -3128,7 +3302,6 @@ int MacroAssembler::pop_p(unsigned int bitset, Register stack) {
#ifdef ASSERT
void MacroAssembler::verify_heapbase(const char* msg) {
#if 0
- assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
assert (Universe::heap() != nullptr, "java heap should be initialized");
if (!UseCompressedOops || Universe::ptr_base() == nullptr) {
// rheapbase is allocated as general register
@@ -3306,7 +3479,7 @@ void MacroAssembler::subw(Register Rd, Register Rn, RegisterOrConstant decrement
void MacroAssembler::reinit_heapbase()
{
if (UseCompressedOops) {
- if (Universe::is_fully_initialized()) {
+ if (Universe::is_fully_initialized() && !AOTCodeCache::is_on_for_dump()) {
mov(rheapbase, CompressedOops::base());
} else {
lea(rheapbase, ExternalAddress(CompressedOops::base_addr()));
@@ -3451,9 +3624,8 @@ extern "C" void findpc(intptr_t x);
void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
{
// In order to get locks to work, we need to fake a in_VM state
- if (ShowMessageBoxOnError ) {
+ if (ShowMessageBoxOnError) {
JavaThread* thread = JavaThread::current();
- JavaThreadState saved_state = thread->thread_state();
thread->set_thread_state(_thread_in_vm);
#ifndef PRODUCT
if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
@@ -4918,13 +5090,10 @@ void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
void MacroAssembler::load_klass(Register dst, Register src) {
if (UseCompactObjectHeaders) {
load_narrow_klass_compact(dst, src);
- decode_klass_not_null(dst);
- } else if (UseCompressedClassPointers) {
- ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
- decode_klass_not_null(dst);
} else {
- ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+ ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
}
+ decode_klass_not_null(dst);
}
void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp1, Register tmp2) {
@@ -4976,25 +5145,22 @@ void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, R
void MacroAssembler::cmp_klass(Register obj, Register klass, Register tmp) {
assert_different_registers(obj, klass, tmp);
- if (UseCompressedClassPointers) {
- if (UseCompactObjectHeaders) {
- load_narrow_klass_compact(tmp, obj);
- } else {
- ldrw(tmp, Address(obj, oopDesc::klass_offset_in_bytes()));
- }
- if (CompressedKlassPointers::base() == nullptr) {
- cmp(klass, tmp, LSL, CompressedKlassPointers::shift());
- return;
- } else if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
- && CompressedKlassPointers::shift() == 0) {
- // Only the bottom 32 bits matter
- cmpw(klass, tmp);
- return;
- }
- decode_klass_not_null(tmp);
+ if (UseCompactObjectHeaders) {
+ load_narrow_klass_compact(tmp, obj);
} else {
- ldr(tmp, Address(obj, oopDesc::klass_offset_in_bytes()));
+ ldrw(tmp, Address(obj, oopDesc::klass_offset_in_bytes()));
}
+ if (CompressedKlassPointers::base() == nullptr) {
+ cmp(klass, tmp, LSL, CompressedKlassPointers::shift());
+ return;
+ } else if (!AOTCodeCache::is_on_for_dump() &&
+ ((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0
+ && CompressedKlassPointers::shift() == 0) {
+ // Only the bottom 32 bits matter
+ cmpw(klass, tmp);
+ return;
+ }
+ decode_klass_not_null(tmp);
cmp(klass, tmp);
}
@@ -5002,36 +5168,25 @@ void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Regi
if (UseCompactObjectHeaders) {
load_narrow_klass_compact(tmp1, obj1);
load_narrow_klass_compact(tmp2, obj2);
- cmpw(tmp1, tmp2);
- } else if (UseCompressedClassPointers) {
+ } else {
ldrw(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes()));
ldrw(tmp2, Address(obj2, oopDesc::klass_offset_in_bytes()));
- cmpw(tmp1, tmp2);
- } else {
- ldr(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes()));
- ldr(tmp2, Address(obj2, oopDesc::klass_offset_in_bytes()));
- cmp(tmp1, tmp2);
}
+ cmpw(tmp1, tmp2);
}
void MacroAssembler::store_klass(Register dst, Register src) {
// FIXME: Should this be a store release? concurrent gcs assumes
// klass length is valid if klass field is not null.
assert(!UseCompactObjectHeaders, "not with compact headers");
- if (UseCompressedClassPointers) {
- encode_klass_not_null(src);
- strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
- } else {
- str(src, Address(dst, oopDesc::klass_offset_in_bytes()));
- }
+ encode_klass_not_null(src);
+ strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
}
void MacroAssembler::store_klass_gap(Register dst, Register src) {
assert(!UseCompactObjectHeaders, "not with compact headers");
- if (UseCompressedClassPointers) {
- // Store to klass gap in destination
- strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
- }
+ // Store to klass gap in destination
+ strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
}
// Algorithm must match CompressedOops::encode.
@@ -5177,8 +5332,6 @@ MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode() {
}
MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode(address base, int shift, const size_t range) {
- assert(UseCompressedClassPointers, "not using compressed class pointers");
-
// KlassDecodeMode shouldn't be set already.
assert(_klass_decode_mode == KlassDecodeNone, "set once");
@@ -5244,7 +5397,7 @@ void MacroAssembler::encode_klass_not_null_for_aot(Register dst, Register src) {
}
void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
- if (AOTCodeCache::is_on_for_dump()) {
+ if (CompressedKlassPointers::base() != nullptr && AOTCodeCache::is_on_for_dump()) {
encode_klass_not_null_for_aot(dst, src);
return;
}
@@ -5308,8 +5461,6 @@ void MacroAssembler::decode_klass_not_null_for_aot(Register dst, Register src) {
}
void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
- assert (UseCompressedClassPointers, "should only be used for compressed headers");
-
if (AOTCodeCache::is_on_for_dump()) {
decode_klass_not_null_for_aot(dst, src);
return;
@@ -5376,7 +5527,6 @@ void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
}
void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
- assert (UseCompressedClassPointers, "should only be used for compressed headers");
assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
int index = oop_recorder()->find_index(k);
@@ -5578,7 +5728,6 @@ address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype
}
void MacroAssembler::adrp(Register reg1, const Address &dest, uint64_t &byte_offset) {
- relocInfo::relocType rtype = dest.rspec().reloc()->type();
uint64_t low_page = (uint64_t)CodeCache::low_bound() >> 12;
uint64_t high_page = (uint64_t)(CodeCache::high_bound()-1) >> 12;
uint64_t dest_page = (uint64_t)dest.target() >> 12;
@@ -5606,12 +5755,33 @@ void MacroAssembler::adrp(Register reg1, const Address &dest, uint64_t &byte_off
}
void MacroAssembler::load_byte_map_base(Register reg) {
- CardTable::CardValue* byte_map_base =
- ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
+#if INCLUDE_CDS
+ if (AOTCodeCache::is_on_for_dump()) {
+ address byte_map_base_adr = AOTRuntimeConstants::card_table_base_address();
+ lea(reg, ExternalAddress(byte_map_base_adr));
+ ldr(reg, Address(reg));
+ return;
+ }
+#endif
+ CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set();
- // Strictly speaking the byte_map_base isn't an address at all, and it might
+ // Strictly speaking the card table base isn't an address at all, and it might
// even be negative. It is thus materialised as a constant.
- mov(reg, (uint64_t)byte_map_base);
+ mov(reg, (uint64_t)ctbs->card_table_base_const());
+}
+
+void MacroAssembler::load_aotrc_address(Register reg, address a) {
+#if INCLUDE_CDS
+ assert(AOTRuntimeConstants::contains(a), "address out of range for data area");
+ if (AOTCodeCache::is_on_for_dump()) {
+ // all aotrc field addresses should be registered in the AOTCodeCache address table
+ lea(reg, ExternalAddress(a));
+ } else {
+ mov(reg, (uint64_t)a);
+ }
+#else
+ ShouldNotReachHere();
+#endif
}
void MacroAssembler::build_frame(int framesize) {
@@ -5955,7 +6125,6 @@ void MacroAssembler::string_equals(Register a1, Register a2,
Label SAME, DONE, SHORT, NEXT_WORD;
Register tmp1 = rscratch1;
Register tmp2 = rscratch2;
- Register cnt2 = tmp2; // cnt2 only used in array length compare
assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
@@ -6265,10 +6434,14 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
// Intrinsic for
//
-// - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray
-// return the number of characters copied.
-// - java/lang/StringUTF16.compress
-// return index of non-latin1 character if copy fails, otherwise 'len'.
+// - sun.nio.cs.ISO_8859_1.Encoder#encodeISOArray0(byte[] sa, int sp, byte[] da, int dp, int len)
+// Encodes char[] to byte[] in ISO-8859-1
+//
+// - java.lang.StringCoding#encodeISOArray0(byte[] sa, int sp, byte[] da, int dp, int len)
+// Encodes byte[] (containing UTF-16) to byte[] in ISO-8859-1
+//
+// - java.lang.StringCoding#encodeAsciiArray0(char[] sa, int sp, byte[] da, int dp, int len)
+// Encodes char[] to byte[] in ASCII
//
// This version always returns the number of characters copied, and does not
// clobber the 'len' register. A successful copy will complete with the post-
@@ -6663,6 +6836,9 @@ void MacroAssembler::spin_wait() {
assert(VM_Version::supports_sb(), "current CPU does not support SB instruction");
sb();
break;
+ case SpinWait::WFET:
+ spin_wait_wfet(VM_Version::spin_wait_desc().delay());
+ break;
default:
ShouldNotReachHere();
}
@@ -6670,6 +6846,28 @@ void MacroAssembler::spin_wait() {
block_comment("}");
}
+void MacroAssembler::spin_wait_wfet(int delay_ns) {
+ // The sequence assumes CNTFRQ_EL0 is fixed to 1GHz. The assumption is valid
+ // starting from Armv8.6, according to the "D12.1.2 The system counter" of the
+ // Arm Architecture Reference Manual for A-profile architecture version M.a.a.
+ // This is sufficient because FEAT_WFXT is introduced from Armv8.6.
+ Register target = rscratch1;
+ Register current = rscratch2;
+ get_cntvctss_el0(current);
+ add(target, current, delay_ns);
+
+ Label L_wait_loop;
+ bind(L_wait_loop);
+
+ wfet(target);
+ get_cntvctss_el0(current);
+
+ cmp(current, target);
+ br(LT, L_wait_loop);
+
+ sb();
+}
+
// Stack frame creation/removal
void MacroAssembler::enter(bool strip_ret_addr) {
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index 4baa07d7d49..a6cc862d05c 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -499,29 +499,20 @@ private:
void mov_immediate64(Register dst, uint64_t imm64);
void mov_immediate32(Register dst, uint32_t imm32);
- int push(unsigned int bitset, Register stack);
- int pop(unsigned int bitset, Register stack);
-
- int push_fp(unsigned int bitset, Register stack, FpPushPopMode mode);
- int pop_fp(unsigned int bitset, Register stack, FpPushPopMode mode);
-
- int push_p(unsigned int bitset, Register stack);
- int pop_p(unsigned int bitset, Register stack);
-
void mov(Register dst, Address a);
public:
- void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
- void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
+ int push(RegSet regset, Register stack);
+ int pop(RegSet regset, Register stack);
- void push_fp(FloatRegSet regs, Register stack, FpPushPopMode mode = PushPopFull) { if (regs.bits()) push_fp(regs.bits(), stack, mode); }
- void pop_fp(FloatRegSet regs, Register stack, FpPushPopMode mode = PushPopFull) { if (regs.bits()) pop_fp(regs.bits(), stack, mode); }
+ int push_fp(FloatRegSet regset, Register stack, FpPushPopMode mode = PushPopFull);
+ int pop_fp(FloatRegSet regset, Register stack, FpPushPopMode mode = PushPopFull);
static RegSet call_clobbered_gp_registers();
- void push_p(PRegSet regs, Register stack) { if (regs.bits()) push_p(regs.bits(), stack); }
- void pop_p(PRegSet regs, Register stack) { if (regs.bits()) pop_p(regs.bits(), stack); }
+ int push_p(PRegSet regset, Register stack);
+ int pop_p(PRegSet regset, Register stack);
// Push and pop everything that might be clobbered by a native
// runtime call except rscratch1 and rscratch2. (They are always
@@ -660,6 +651,14 @@ public:
msr(0b011, 0b0100, 0b0010, 0b000, reg);
}
+ // CNTVCTSS_EL0: op1 == 011
+ // CRn == 1110
+ // CRm == 0000
+ // op2 == 110
+ inline void get_cntvctss_el0(Register reg) {
+ mrs(0b011, 0b1110, 0b0000, 0b110, reg);
+ }
+
// idiv variant which deals with MINLONG as dividend and -1 as divisor
int corrected_idivl(Register result, Register ra, Register rb,
bool want_remainder, Register tmp = rscratch1);
@@ -678,7 +677,6 @@ public:
static bool uses_implicit_null_check(void* address);
static address target_addr_for_insn(address insn_addr);
- static address target_addr_for_insn_or_null(address insn_addr);
// Required platform-specific helpers for Label::patch_instructions.
// They _shadow_ the declarations in AbstractAssembler, which are undefined.
@@ -892,10 +890,6 @@ public:
// thread in the default location (rthread)
void reset_last_Java_frame(bool clear_fp);
- // Stores
- void store_check(Register obj); // store check for obj - register is destroyed afterwards
- void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed)
-
void resolve_jobject(Register value, Register tmp1, Register tmp2);
void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
@@ -1122,6 +1116,8 @@ public:
Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+ void profile_receiver_type(Register recv, Register mdp, int mdp_offset);
+
void verify_sve_vector_length(Register tmp = rscratch1);
void reinitialize_ptrue() {
if (UseSVE > 0) {
@@ -1475,6 +1471,9 @@ public:
// Load the base of the cardtable byte map into reg.
void load_byte_map_base(Register reg);
+ // Load a constant address in the AOT Runtime Constants area
+ void load_aotrc_address(Register reg, address a);
+
// Prolog generator routines to support switch between x86 code and
// generated ARM code
@@ -1720,6 +1719,7 @@ public:
// Code for java.lang.Thread::onSpinWait() intrinsic.
void spin_wait();
+ void spin_wait_wfet(int delay_ns);
void fast_lock(Register basic_lock, Register obj, Register t1, Register t2, Register t3, Label& slow);
void fast_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow);
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
index f2003dd9b55..8b76b96d345 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -133,7 +133,6 @@ void NativeMovConstReg::verify() {
intptr_t NativeMovConstReg::data() const {
- // das(uint64_t(instruction_address()),2);
address addr = MacroAssembler::target_addr_for_insn(instruction_address());
if (maybe_cpool_ref(instruction_address())) {
return *(intptr_t*)addr;
@@ -144,6 +143,7 @@ intptr_t NativeMovConstReg::data() const {
void NativeMovConstReg::set_data(intptr_t x) {
if (maybe_cpool_ref(instruction_address())) {
+ MACOS_AARCH64_ONLY(os::thread_wx_enable_write());
address addr = MacroAssembler::target_addr_for_insn(instruction_address());
*(intptr_t*)addr = x;
} else {
@@ -192,7 +192,6 @@ int NativeMovRegMem::offset() const {
void NativeMovRegMem::set_offset(int x) {
address pc = instruction_address();
- unsigned insn = *(unsigned*)pc;
if (maybe_cpool_ref(pc)) {
address addr = MacroAssembler::target_addr_for_insn(pc);
*(int64_t*)addr = x;
@@ -204,7 +203,7 @@ void NativeMovRegMem::set_offset(int x) {
void NativeMovRegMem::verify() {
#ifdef ASSERT
- address dest = MacroAssembler::target_addr_for_insn_or_null(instruction_address());
+ MacroAssembler::target_addr_for_insn(instruction_address());
#endif
}
@@ -213,7 +212,7 @@ void NativeMovRegMem::verify() {
void NativeJump::verify() { ; }
address NativeJump::jump_destination() const {
- address dest = MacroAssembler::target_addr_for_insn_or_null(instruction_address());
+ address dest = MacroAssembler::target_addr_for_insn(instruction_address());
// We use jump to self as the unresolved address which the inline
// cache code (and relocs) know about
@@ -350,8 +349,6 @@ bool NativeInstruction::is_stop() {
//-------------------------------------------------------------------
-void NativeGeneralJump::verify() { }
-
// MT-safe patching of a long jump instruction.
void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
ShouldNotCallThis();
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
index c30cb911d96..ab9896fa426 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2025, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -78,7 +78,6 @@ public:
inline bool is_nop() const;
bool is_jump();
bool is_general_jump();
- inline bool is_jump_or_nop();
inline bool is_cond_jump();
bool is_safepoint_poll();
bool is_movz();
@@ -90,16 +89,18 @@ protected:
s_char sbyte_at(int offset) const { return *(s_char*)addr_at(offset); }
u_char ubyte_at(int offset) const { return *(u_char*)addr_at(offset); }
- jint int_at(int offset) const { return *(jint*)addr_at(offset); }
- juint uint_at(int offset) const { return *(juint*)addr_at(offset); }
- address ptr_at(int offset) const { return *(address*)addr_at(offset); }
- oop oop_at(int offset) const { return *(oop*)addr_at(offset); }
+ jint int_at(int offset) const { return *(jint*)addr_at(offset); }
+ juint uint_at(int offset) const { return *(juint*)addr_at(offset); }
+ address ptr_at(int offset) const { return *(address*)addr_at(offset); }
+ oop oop_at(int offset) const { return *(oop*)addr_at(offset); }
- void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; }
- void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; }
- void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; }
- void set_ptr_at(int offset, address ptr) { *(address*)addr_at(offset) = ptr; }
- void set_oop_at(int offset, oop o) { *(oop*)addr_at(offset) = o; }
+#define MACOS_WX_WRITE MACOS_AARCH64_ONLY(os::thread_wx_enable_write())
+ void set_char_at(int offset, char c) { MACOS_WX_WRITE; *addr_at(offset) = (u_char)c; }
+ void set_int_at(int offset, jint i) { MACOS_WX_WRITE; *(jint*)addr_at(offset) = i; }
+ void set_uint_at(int offset, juint i) { MACOS_WX_WRITE; *(juint*)addr_at(offset) = i; }
+ void set_ptr_at(int offset, address ptr) { MACOS_WX_WRITE; *(address*)addr_at(offset) = ptr; }
+ void set_oop_at(int offset, oop o) { MACOS_WX_WRITE; *(oop*)addr_at(offset) = o; }
+#undef MACOS_WX_WRITE
void wrote(int offset);
@@ -177,13 +178,11 @@ public:
address destination() const;
void set_destination(address dest) {
- int offset = dest - instruction_address();
- unsigned int insn = 0b100101 << 26;
+ int64_t offset = dest - instruction_address();
+ juint insn = 0b100101u << 26u;
assert((offset & 3) == 0, "should be");
- offset >>= 2;
- offset &= (1 << 26) - 1; // mask off insn part
- insn |= offset;
- set_int_at(displacement_offset, insn);
+ Instruction_aarch64::spatch(reinterpret_cast(&insn), 25, 0, offset >> 2);
+ set_uint_at(displacement_offset, insn);
}
void verify_alignment() { ; }
@@ -380,7 +379,6 @@ public:
void set_jump_destination(address dest);
static void replace_mt_safe(address instr_addr, address code_buffer);
- static void verify();
};
inline NativeGeneralJump* nativeGeneralJump_at(address address) {
@@ -419,10 +417,6 @@ inline bool NativeInstruction::is_jump() {
return false;
}
-inline bool NativeInstruction::is_jump_or_nop() {
- return is_nop() || is_jump();
-}
-
// Call trampoline stubs.
class NativeCallTrampolineStub : public NativeInstruction {
public:
diff --git a/src/hotspot/cpu/aarch64/relocInfo_aarch64.cpp b/src/hotspot/cpu/aarch64/relocInfo_aarch64.cpp
index dbec2d76d4f..f1b9fb213a2 100644
--- a/src/hotspot/cpu/aarch64/relocInfo_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/relocInfo_aarch64.cpp
@@ -54,7 +54,12 @@ void Relocation::pd_set_data_value(address x, bool verify_only) {
bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
break;
}
- ICache::invalidate_range(addr(), bytes);
+
+ if (UseSingleICacheInvalidation) {
+ assert(_binding != nullptr, "expect to be called with RelocIterator in use");
+ } else {
+ ICache::invalidate_range(addr(), bytes);
+ }
}
address Relocation::pd_call_destination(address orig_addr) {
diff --git a/src/hotspot/cpu/aarch64/runtime_aarch64.cpp b/src/hotspot/cpu/aarch64/runtime_aarch64.cpp
index e36aa21b567..638e57b03fe 100644
--- a/src/hotspot/cpu/aarch64/runtime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/runtime_aarch64.cpp
@@ -290,7 +290,7 @@ ExceptionBlob* OptoRuntime::generate_exception_blob() {
assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
const char* name = OptoRuntime::stub_name(StubId::c2_exception_id);
- CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::C2Blob, (uint)BlobId::c2_exception_id, name);
+ CodeBlob* blob = AOTCodeCache::load_code_blob(AOTCodeEntry::C2Blob, BlobId::c2_exception_id);
if (blob != nullptr) {
return blob->as_exception_blob();
}
diff --git a/src/hotspot/cpu/aarch64/spin_wait_aarch64.cpp b/src/hotspot/cpu/aarch64/spin_wait_aarch64.cpp
index 7da0151d834..97a981ab815 100644
--- a/src/hotspot/cpu/aarch64/spin_wait_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/spin_wait_aarch64.cpp
@@ -32,6 +32,7 @@ bool SpinWait::supports(const char *name) {
strcmp(name, "isb") == 0 ||
strcmp(name, "yield") == 0 ||
strcmp(name, "sb") == 0 ||
+ strcmp(name, "wfet") == 0 ||
strcmp(name, "none") == 0);
}
@@ -46,6 +47,8 @@ SpinWait::Inst SpinWait::from_name(const char* name) {
return SpinWait::YIELD;
} else if (strcmp(name, "sb") == 0) {
return SpinWait::SB;
+ } else if (strcmp(name, "wfet") == 0) {
+ return SpinWait::WFET;
}
return SpinWait::NONE;
diff --git a/src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp b/src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp
index 0e96a4b7157..6ebcd2477a8 100644
--- a/src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp
@@ -24,6 +24,8 @@
#ifndef CPU_AARCH64_SPIN_WAIT_AARCH64_HPP
#define CPU_AARCH64_SPIN_WAIT_AARCH64_HPP
+#include "utilities/debug.hpp"
+
class SpinWait {
public:
enum Inst {
@@ -31,21 +33,30 @@ public:
NOP,
ISB,
YIELD,
- SB
+ SB,
+ WFET
};
private:
Inst _inst;
int _count;
+ int _delay;
Inst from_name(const char *name);
public:
- SpinWait(Inst inst = NONE, int count = 0) : _inst(inst), _count(inst == NONE ? 0 : count) {}
- SpinWait(const char *name, int count) : SpinWait(from_name(name), count) {}
+ SpinWait(Inst inst = NONE, int count = 0, int delay = -1)
+ : _inst(inst), _count(inst == NONE ? 0 : count), _delay(delay) {}
+ SpinWait(const char *name, int count, int delay)
+ : SpinWait(from_name(name), count, delay) {}
Inst inst() const { return _inst; }
int inst_count() const { return _count; }
+ int delay() const {
+ assert(_inst == WFET, "Specifying the delay value is only supported for WFET");
+ assert(_delay > 0, "The delay value must be positive");
+ return _delay;
+ }
static bool supports(const char *name);
};
diff --git a/src/hotspot/cpu/aarch64/stubDeclarations_aarch64.hpp b/src/hotspot/cpu/aarch64/stubDeclarations_aarch64.hpp
index 695534604b8..d1f59e479db 100644
--- a/src/hotspot/cpu/aarch64/stubDeclarations_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/stubDeclarations_aarch64.hpp
@@ -29,32 +29,39 @@
#define STUBGEN_PREUNIVERSE_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(preuniverse, 0) \
#define STUBGEN_INITIAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(initial, 10000) \
#define STUBGEN_CONTINUATION_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(continuation, 2000) \
+// count needed for declaration of vector_iota_indices stub
+#define VECTOR_IOTA_COUNT 6
#define STUBGEN_COMPILER_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(compiler, 70000) \
do_stub(compiler, vector_iota_indices) \
- do_arch_entry(aarch64, compiler, vector_iota_indices, \
- vector_iota_indices, vector_iota_indices) \
+ do_arch_entry_array(aarch64, compiler, vector_iota_indices, \
+ vector_iota_indices, vector_iota_indices, \
+ VECTOR_IOTA_COUNT) \
do_stub(compiler, large_array_equals) \
do_arch_entry(aarch64, compiler, large_array_equals, \
large_array_equals, large_array_equals) \
@@ -84,8 +91,7 @@
do_stub(compiler, count_positives) \
do_arch_entry(aarch64, compiler, count_positives, count_positives, \
count_positives) \
- do_stub(compiler, count_positives_long) \
- do_arch_entry(aarch64, compiler, count_positives_long, \
+ do_arch_entry(aarch64, compiler, count_positives, \
count_positives_long, count_positives_long) \
do_stub(compiler, compare_long_string_LL) \
do_arch_entry(aarch64, compiler, compare_long_string_LL, \
@@ -108,14 +114,16 @@
do_stub(compiler, string_indexof_linear_ul) \
do_arch_entry(aarch64, compiler, string_indexof_linear_ul, \
string_indexof_linear_ul, string_indexof_linear_ul) \
- /* this uses the entry for ghash_processBlocks */ \
- do_stub(compiler, ghash_processBlocks_wide) \
+ do_stub(compiler, ghash_processBlocks_small) \
+ do_arch_entry(aarch64, compiler, ghash_processBlocks_small, \
+ ghash_processBlocks_small, ghash_processBlocks_small) \
#define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(final, 20000 ZGC_ONLY(+85000)) \
do_stub(final, copy_byte_f) \
do_arch_entry(aarch64, final, copy_byte_f, copy_byte_f, \
@@ -139,9 +147,49 @@
do_stub(final, spin_wait) \
do_arch_entry_init(aarch64, final, spin_wait, spin_wait, \
spin_wait, empty_spin_wait) \
- /* stub only -- entries are not stored in StubRoutines::aarch64 */ \
/* n.b. these are not the same as the generic atomic stubs */ \
do_stub(final, atomic_entry_points) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_fetch_add_4_impl, atomic_fetch_add_4_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_fetch_add_8_impl, atomic_fetch_add_8_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_fetch_add_4_relaxed_impl, \
+ atomic_fetch_add_4_relaxed_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_fetch_add_8_relaxed_impl, \
+ atomic_fetch_add_8_relaxed_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_xchg_4_impl, atomic_xchg_4_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_xchg_8_impl, atomic_xchg_8_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_cmpxchg_1_impl, atomic_cmpxchg_1_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_cmpxchg_4_impl, atomic_cmpxchg_4_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_cmpxchg_8_impl, atomic_cmpxchg_8_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_cmpxchg_1_relaxed_impl, \
+ atomic_cmpxchg_1_relaxed_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_cmpxchg_4_relaxed_impl, \
+ atomic_cmpxchg_4_relaxed_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_cmpxchg_8_relaxed_impl, \
+ atomic_cmpxchg_8_relaxed_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_cmpxchg_4_release_impl, \
+ atomic_cmpxchg_4_release_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_cmpxchg_8_release_impl, \
+ atomic_cmpxchg_8_release_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_cmpxchg_4_seq_cst_impl, \
+ atomic_cmpxchg_4_seq_cst_impl) \
+ do_arch_entry(aarch64, final, atomic_entry_points, \
+ atomic_cmpxchg_8_seq_cst_impl, \
+ atomic_cmpxchg_8_seq_cst_impl) \
#endif // CPU_AARCH64_STUBDECLARATIONS_HPP
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index 7e2f333ba40..fddb37b7b8d 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -79,6 +79,166 @@
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+// Constant data definitions
+
+static const uint32_t _sha256_round_consts[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+
+static const uint64_t _sha512_round_consts[80] = {
+ 0x428A2F98D728AE22L, 0x7137449123EF65CDL, 0xB5C0FBCFEC4D3B2FL,
+ 0xE9B5DBA58189DBBCL, 0x3956C25BF348B538L, 0x59F111F1B605D019L,
+ 0x923F82A4AF194F9BL, 0xAB1C5ED5DA6D8118L, 0xD807AA98A3030242L,
+ 0x12835B0145706FBEL, 0x243185BE4EE4B28CL, 0x550C7DC3D5FFB4E2L,
+ 0x72BE5D74F27B896FL, 0x80DEB1FE3B1696B1L, 0x9BDC06A725C71235L,
+ 0xC19BF174CF692694L, 0xE49B69C19EF14AD2L, 0xEFBE4786384F25E3L,
+ 0x0FC19DC68B8CD5B5L, 0x240CA1CC77AC9C65L, 0x2DE92C6F592B0275L,
+ 0x4A7484AA6EA6E483L, 0x5CB0A9DCBD41FBD4L, 0x76F988DA831153B5L,
+ 0x983E5152EE66DFABL, 0xA831C66D2DB43210L, 0xB00327C898FB213FL,
+ 0xBF597FC7BEEF0EE4L, 0xC6E00BF33DA88FC2L, 0xD5A79147930AA725L,
+ 0x06CA6351E003826FL, 0x142929670A0E6E70L, 0x27B70A8546D22FFCL,
+ 0x2E1B21385C26C926L, 0x4D2C6DFC5AC42AEDL, 0x53380D139D95B3DFL,
+ 0x650A73548BAF63DEL, 0x766A0ABB3C77B2A8L, 0x81C2C92E47EDAEE6L,
+ 0x92722C851482353BL, 0xA2BFE8A14CF10364L, 0xA81A664BBC423001L,
+ 0xC24B8B70D0F89791L, 0xC76C51A30654BE30L, 0xD192E819D6EF5218L,
+ 0xD69906245565A910L, 0xF40E35855771202AL, 0x106AA07032BBD1B8L,
+ 0x19A4C116B8D2D0C8L, 0x1E376C085141AB53L, 0x2748774CDF8EEB99L,
+ 0x34B0BCB5E19B48A8L, 0x391C0CB3C5C95A63L, 0x4ED8AA4AE3418ACBL,
+ 0x5B9CCA4F7763E373L, 0x682E6FF3D6B2B8A3L, 0x748F82EE5DEFB2FCL,
+ 0x78A5636F43172F60L, 0x84C87814A1F0AB72L, 0x8CC702081A6439ECL,
+ 0x90BEFFFA23631E28L, 0xA4506CEBDE82BDE9L, 0xBEF9A3F7B2C67915L,
+ 0xC67178F2E372532BL, 0xCA273ECEEA26619CL, 0xD186B8C721C0C207L,
+ 0xEADA7DD6CDE0EB1EL, 0xF57D4F7FEE6ED178L, 0x06F067AA72176FBAL,
+ 0x0A637DC5A2C898A6L, 0x113F9804BEF90DAEL, 0x1B710B35131C471BL,
+ 0x28DB77F523047D84L, 0x32CAAB7B40C72493L, 0x3C9EBE0A15C9BEBCL,
+ 0x431D67C49C100D4CL, 0x4CC5D4BECB3E42B6L, 0x597F299CFC657E2AL,
+ 0x5FCB6FAB3AD6FAECL, 0x6C44198C4A475817L
+};
+
+static const uint64_t _sha3_round_consts[24] = {
+ 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL,
+ 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L,
+ 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL,
+ 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL,
+ 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L,
+ 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L,
+ 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L,
+ 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L
+};
+
+static const uint64_t _double_keccak_round_consts[24] = {
+ 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL,
+ 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L,
+ 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL,
+ 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL,
+ 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L,
+ 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L,
+ 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L,
+ 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L
+};
+
+static const char _encodeBlock_toBase64[64] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
+};
+
+static const char _encodeBlock_toBase64URL[64] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
+};
+
+// Non-SIMD lookup tables are mostly dumped from fromBase64 array used in java.util.Base64,
+// except the trailing character '=' is also treated illegal value in this intrinsic. That
+// is java.util.Base64.fromBase64['='] = -2, while fromBase(URL)64ForNoSIMD['='] = 255 here.
+static const uint8_t _decodeBlock_fromBase64ForNoSIMD[256] = {
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 62u, 255u, 255u, 255u, 63u,
+ 52u, 53u, 54u, 55u, 56u, 57u, 58u, 59u, 60u, 61u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u,
+ 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 26u, 27u, 28u, 29u, 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, 38u, 39u, 40u,
+ 41u, 42u, 43u, 44u, 45u, 46u, 47u, 48u, 49u, 50u, 51u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+};
+
+static const uint8_t _decodeBlock_fromBase64URLForNoSIMD[256] = {
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 62u, 255u, 255u,
+ 52u, 53u, 54u, 55u, 56u, 57u, 58u, 59u, 60u, 61u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u,
+ 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 255u, 255u, 255u, 255u, 63u,
+ 255u, 26u, 27u, 28u, 29u, 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, 38u, 39u, 40u,
+ 41u, 42u, 43u, 44u, 45u, 46u, 47u, 48u, 49u, 50u, 51u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+};
+
+// A legal value of base64 code is in range [0, 127]. We need two lookups
+// with tbl/tbx and combine them to get the decode data. The 1st table vector
+// lookup use tbl, out of range indices are set to 0 in destination. The 2nd
+// table vector lookup use tbx, out of range indices are unchanged in
+// destination. Input [64..126] is mapped to index [65, 127] in second lookup.
+// The value of index 64 is set to 0, so that we know that we already get the
+// decoded data with the 1st lookup.
+static const uint8_t _decodeBlock_fromBase64ForSIMD[128] = {
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 62u, 255u, 255u, 255u, 63u,
+ 52u, 53u, 54u, 55u, 56u, 57u, 58u, 59u, 60u, 61u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 0u, 255u, 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u,
+ 14u, 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 26u, 27u, 28u, 29u, 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, 38u, 39u,
+ 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u, 48u, 49u, 50u, 51u, 255u, 255u, 255u, 255u,
+};
+
+static const uint8_t _decodeBlock_fromBase64URLForSIMD[128] = {
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 62u, 255u, 255u,
+ 52u, 53u, 54u, 55u, 56u, 57u, 58u, 59u, 60u, 61u, 255u, 255u, 255u, 255u, 255u, 255u,
+ 0u, 255u, 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u,
+ 14u, 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 255u, 255u, 255u, 255u,
+ 63u, 255u, 26u, 27u, 28u, 29u, 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, 38u, 39u,
+ 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u, 48u, 49u, 50u, 51u, 255u, 255u, 255u, 255u,
+};
+
+
// Stub Code definitions
class StubGenerator: public StubCodeGenerator {
@@ -203,8 +363,17 @@ class StubGenerator: public StubCodeGenerator {
"adjust this code");
StubId stub_id = StubId::stubgen_call_stub_id;
+ GrowableArray entries;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 2, "sanity check");
+ address start = load_archive_data(stub_id, &entries);
+ if (start != nullptr) {
+ assert(entries.length() == 1, "expected 1 extra entry");
+ return_address = entries.at(0);
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Address sp_after_call (rfp, sp_after_call_off * wordSize);
@@ -323,6 +492,7 @@ class StubGenerator: public StubCodeGenerator {
// save current address for use by exception handling code
return_address = __ pc();
+ entries.append(return_address);
// store result depending on type (everything that is not
// T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
@@ -406,6 +576,9 @@ class StubGenerator: public StubCodeGenerator {
__ strd(j_farg0, Address(j_rarg2, 0));
__ br(Assembler::AL, exit);
+ // record the stub entry and end plus the auxiliary entry
+ store_archive_data(stub_id, start, __ pc(), &entries);
+
return start;
}
@@ -423,8 +596,14 @@ class StubGenerator: public StubCodeGenerator {
address generate_catch_exception() {
StubId stub_id = StubId::stubgen_catch_exception_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// same as in generate_call_stub():
const Address sp_after_call(rfp, sp_after_call_off * wordSize);
@@ -450,7 +629,9 @@ class StubGenerator: public StubCodeGenerator {
__ verify_oop(r0);
__ str(r0, Address(rthread, Thread::pending_exception_offset()));
- __ mov(rscratch1, (address)__FILE__);
+ // special case -- add file name string to AOT address table
+ address file = (address)AOTCodeCache::add_C_string(__FILE__);
+ __ lea(rscratch1, ExternalAddress(file));
__ str(rscratch1, Address(rthread, Thread::exception_file_offset()));
__ movw(rscratch1, (int)__LINE__);
__ strw(rscratch1, Address(rthread, Thread::exception_line_offset()));
@@ -458,7 +639,10 @@ class StubGenerator: public StubCodeGenerator {
// complete return to VM
assert(StubRoutines::_call_stub_return_address != nullptr,
"_call_stub_return_address must have been generated before");
- __ b(StubRoutines::_call_stub_return_address);
+ __ b(RuntimeAddress(StubRoutines::_call_stub_return_address));
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
return start;
}
@@ -479,8 +663,14 @@ class StubGenerator: public StubCodeGenerator {
address generate_forward_exception() {
StubId stub_id = StubId::stubgen_forward_exception_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// Upon entry, LR points to the return address returning into
// Java (interpreted or compiled) code; i.e., the return address
@@ -551,6 +741,9 @@ class StubGenerator: public StubCodeGenerator {
__ verify_oop(r0);
__ br(r19);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -569,8 +762,14 @@ class StubGenerator: public StubCodeGenerator {
// [tos + 5]: saved rscratch1
address generate_verify_oop() {
StubId stub_id = StubId::stubgen_verify_oop_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label exit, error;
@@ -613,33 +812,64 @@ class StubGenerator: public StubCodeGenerator {
__ blr(rscratch1);
__ hlt(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
// Generate indices for iota vector.
- address generate_iota_indices(StubId stub_id) {
+ void generate_iota_indices(StubId stub_id) {
+ GrowableArray entries;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == VECTOR_IOTA_COUNT, "sanity check");
+ address start = load_archive_data(stub_id, &entries);
+ if (start != nullptr) {
+ assert(entries.length() == entry_count - 1,
+ "unexpected entries count %d", entries.length());
+ StubRoutines::aarch64::_vector_iota_indices[0] = start;
+ for (int i = 1; i < VECTOR_IOTA_COUNT; i++) {
+ StubRoutines::aarch64::_vector_iota_indices[i] = entries.at(i - 1);
+ }
+ return;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// B
__ emit_data64(0x0706050403020100, relocInfo::none);
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
+ entries.append(__ pc());
// H
__ emit_data64(0x0003000200010000, relocInfo::none);
__ emit_data64(0x0007000600050004, relocInfo::none);
+ entries.append(__ pc());
// S
__ emit_data64(0x0000000100000000, relocInfo::none);
__ emit_data64(0x0000000300000002, relocInfo::none);
+ entries.append(__ pc());
// D
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0x0000000000000001, relocInfo::none);
+ entries.append(__ pc());
// S - FP
__ emit_data64(0x3F80000000000000, relocInfo::none); // 0.0f, 1.0f
__ emit_data64(0x4040000040000000, relocInfo::none); // 2.0f, 3.0f
+ entries.append(__ pc());
// D - FP
__ emit_data64(0x0000000000000000, relocInfo::none); // 0.0d
__ emit_data64(0x3FF0000000000000, relocInfo::none); // 1.0d
- return start;
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc(), &entries);
+
+ // install the entry addresses in the entry array
+ assert(entries.length() == entry_count - 1,
+ "unexpected entries count %d", entries.length());
+ StubRoutines::aarch64::_vector_iota_indices[0] = start;
+ for (int i = 1; i < VECTOR_IOTA_COUNT; i++) {
+ StubRoutines::aarch64::_vector_iota_indices[i] = entries.at(i - 1);
+ }
}
// The inner part of zero_words(). This is the bulk operation,
@@ -656,15 +886,21 @@ class StubGenerator: public StubCodeGenerator {
// r11 < MacroAssembler::zero_words_block_size.
address generate_zero_blocks() {
+ StubId stub_id = StubId::stubgen_zero_blocks_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, stub_id);
Label done;
Label base_aligned;
Register base = r10, cnt = r11;
- __ align(CodeEntryAlignment);
- StubId stub_id = StubId::stubgen_zero_blocks_id;
- StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
if (UseBlockZeroing) {
int zva_length = VM_Version::zva_length();
@@ -707,6 +943,9 @@ class StubGenerator: public StubCodeGenerator {
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -803,6 +1042,12 @@ class StubGenerator: public StubCodeGenerator {
// s and d are adjusted to point to the remaining words to copy
//
address generate_copy_longs(StubId stub_id, DecoratorSet decorators, Register s, Register d, Register count) {
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
BasicType type;
copy_direction direction;
@@ -854,7 +1099,7 @@ class StubGenerator: public StubCodeGenerator {
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label unaligned_copy_long;
if (AvoidUnalignedAccesses) {
@@ -1154,6 +1399,9 @@ class StubGenerator: public StubCodeGenerator {
__ ret(lr);
}
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1445,19 +1693,25 @@ class StubGenerator: public StubCodeGenerator {
}
if (direction == copy_forwards) {
if (type != T_OBJECT) {
- __ bl(StubRoutines::aarch64::copy_byte_f());
+ __ lea(rscratch1, RuntimeAddress(StubRoutines::aarch64::copy_byte_f()));
+ __ blr(rscratch1);
} else if ((decorators & IS_DEST_UNINITIALIZED) != 0) {
- __ bl(StubRoutines::aarch64::copy_oop_uninit_f());
+ __ lea(rscratch1, RuntimeAddress(StubRoutines::aarch64::copy_oop_uninit_f()));
+ __ blr(rscratch1);
} else {
- __ bl(StubRoutines::aarch64::copy_oop_f());
+ __ lea(rscratch1, RuntimeAddress(StubRoutines::aarch64::copy_oop_f()));
+ __ blr(rscratch1);
}
} else {
if (type != T_OBJECT) {
- __ bl(StubRoutines::aarch64::copy_byte_b());
+ __ lea(rscratch1, RuntimeAddress(StubRoutines::aarch64::copy_byte_b()));
+ __ blr(rscratch1);
} else if ((decorators & IS_DEST_UNINITIALIZED) != 0) {
- __ bl(StubRoutines::aarch64::copy_oop_uninit_b());
+ __ lea(rscratch1, RuntimeAddress(StubRoutines::aarch64::copy_oop_uninit_b()));
+ __ blr(rscratch1);
} else {
- __ bl(StubRoutines::aarch64::copy_oop_b());
+ __ lea(rscratch1, RuntimeAddress(StubRoutines::aarch64::copy_oop_b()));
+ __ blr(rscratch1);
}
}
@@ -1508,8 +1762,8 @@ class StubGenerator: public StubCodeGenerator {
// stub_id - is used to name the stub and identify all details of
// how to perform the copy.
//
- // entry - is assigned to the stub's post push entry point unless
- // it is null
+ // nopush_entry - is assigned to the stub's post push entry point
+ // unless it is null
//
// Inputs:
// c_rarg0 - source array address
@@ -1525,8 +1779,6 @@ class StubGenerator: public StubCodeGenerator {
// copy method
//
address generate_disjoint_copy(StubId stub_id, address *nopush_entry) {
- Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
- RegSet saved_reg = RegSet::of(s, d, count);
int size;
bool aligned;
bool is_oop;
@@ -1607,17 +1859,45 @@ class StubGenerator: public StubCodeGenerator {
ShouldNotReachHere();
break;
}
+ // all stubs provide a 2nd entry which omits the frame push for
+ // use when bailing out from a conjoint copy. However we may also
+ // need some extra addressses for memory access protection.
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 2, "sanity check");
+ assert(nopush_entry != nullptr, "all disjoint copy stubs export a nopush entry");
+
+ bool add_extras = !is_oop && (!aligned || sizeof(jlong) == size);
+ int extra_count = ((add_extras ? 1 : 0) * UnsafeMemoryAccess::COLUMN_COUNT);
+ GrowableArray entries;
+ GrowableArray extras;
+ GrowableArray *extras_ptr = (extra_count > 0 ? &extras : nullptr);
+ address start = load_archive_data(stub_id, &entries, extras_ptr);
+ if (start != nullptr) {
+ assert(entries.length() == entry_count - 1,
+ "unexpected entries count %d", entries.length());
+ *nopush_entry = entries.at(0);
+ assert(extras.length() == extra_count,
+ "unexpected extra count %d", extras.length());
+ if (add_extras) {
+ // register one handler at offset 0
+ register_unsafe_access_handlers(extras, 0, 1);
+ }
+ return start;
+ }
+
+ Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+ RegSet saved_reg = RegSet::of(s, d, count);
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
- if (nopush_entry != nullptr) {
- *nopush_entry = __ pc();
- // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
- BLOCK_COMMENT("Entry:");
- }
+ *nopush_entry = __ pc();
+ entries.append(*nopush_entry);
+
+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+ BLOCK_COMMENT("Post-Push Entry:");
DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
if (dest_uninitialized) {
@@ -1636,8 +1916,7 @@ class StubGenerator: public StubCodeGenerator {
}
{
// UnsafeMemoryAccess page error: continue after unsafe access
- bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
- UnsafeMemoryAccessMark umam(this, add_entry, true);
+ UnsafeMemoryAccessMark umam(this, add_extras, true);
copy_memory(decorators, is_oop ? T_OBJECT : T_BYTE, aligned, s, d, count, size);
}
@@ -1652,6 +1931,20 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ mov(r0, zr); // return 0
__ ret(lr);
+
+ address end = __ pc();
+
+ if (add_extras) {
+ // retrieve the registered handler addresses
+ retrieve_unsafe_access_handlers(start, end, extras);
+ assert(extras.length() == extra_count
+ , "incorrect handlers count %d", extras.length());
+ }
+
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, &entries, extras_ptr);
+
return start;
}
@@ -1663,8 +1956,8 @@ class StubGenerator: public StubCodeGenerator {
// corresponding disjoint copy routine which can be
// jumped to if the ranges do not actually overlap
//
- // entry - is assigned to the stub's post push entry point unless
- // it is null
+ // nopush_entry - is assigned to the stub's post push entry point
+ // unless it is null
//
//
// Inputs:
@@ -1681,8 +1974,6 @@ class StubGenerator: public StubCodeGenerator {
// used by some other conjoint copy method
//
address generate_conjoint_copy(StubId stub_id, address nooverlap_target, address *nopush_entry) {
- Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
- RegSet saved_regs = RegSet::of(s, d, count);
int size;
bool aligned;
bool is_oop;
@@ -1762,15 +2053,47 @@ class StubGenerator: public StubCodeGenerator {
default:
ShouldNotReachHere();
}
+ // only some conjoint stubs generate a 2nd entry
+ int entry_count = StubInfo::entry_count(stub_id);
+ int expected_entry_count = (nopush_entry == nullptr ? 1 : 2);
+ assert(entry_count == expected_entry_count,
+ "expected entry count %d does not match declared entry count %d for stub %s",
+ expected_entry_count, entry_count, StubInfo::name(stub_id));
+ // We need to protect memory accesses in certain cases
+ bool add_extras = !is_oop && (!aligned || sizeof(jlong) == size);
+ int extra_count = ((add_extras ? 1 : 0) * UnsafeMemoryAccess::COLUMN_COUNT);
+ GrowableArray entries;
+ GrowableArray extras;
+ GrowableArray *entries_ptr = (nopush_entry != nullptr ? &entries : nullptr);
+ GrowableArray *extras_ptr = (extra_count > 0 ? &extras : nullptr);
+ address start = load_archive_data(stub_id, entries_ptr, extras_ptr);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entries count %d", entries.length());
+ assert(extras.length() == extra_count,
+ "unexpected extra count %d", extras.length());
+ if (nopush_entry != nullptr) {
+ *nopush_entry = entries.at(0);
+ }
+ if (add_extras) {
+ // register one handler at offset 0
+ register_unsafe_access_handlers(extras, 0, 1);
+ }
+ return start;
+ }
+
+ Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+ RegSet saved_regs = RegSet::of(s, d, count);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
if (nopush_entry != nullptr) {
*nopush_entry = __ pc();
+ entries.append(*nopush_entry);
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
- BLOCK_COMMENT("Entry:");
+ BLOCK_COMMENT("Post-Push Entry:");
}
// use fwd copy when (d-s) above_equal (count*size)
@@ -1798,8 +2121,7 @@ class StubGenerator: public StubCodeGenerator {
}
{
// UnsafeMemoryAccess page error: continue after unsafe access
- bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
- UnsafeMemoryAccessMark umam(this, add_entry, true);
+ UnsafeMemoryAccessMark umam(this, add_extras, true);
copy_memory(decorators, is_oop ? T_OBJECT : T_BYTE, aligned, s, d, count, -size);
}
if (is_oop) {
@@ -1811,6 +2133,23 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ mov(r0, zr); // return 0
__ ret(lr);
+
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entries count %d", entries.length());
+
+ address end = __ pc();
+
+ if (add_extras) {
+ // retrieve the registered handler addresses
+ retrieve_unsafe_access_handlers(start, end, extras);
+ assert(extras.length() == extra_count,
+ "incorrect handlers count %d", extras.length());
+ }
+
+ // record the stub entry and end plus any no_push entry and/or
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, extras_ptr);
+
return start;
}
@@ -1864,6 +2203,27 @@ class StubGenerator: public StubCodeGenerator {
ShouldNotReachHere();
}
+ // The normal stub provides a 2nd entry which omits the frame push
+ // for use when bailing out from a disjoint copy.
+ // Only some conjoint stubs generate a 2nd entry
+ int entry_count = StubInfo::entry_count(stub_id);
+ int expected_entry_count = (nopush_entry == nullptr ? 1 : 2);
+ GrowableArray entries;
+ GrowableArray *entries_ptr = (expected_entry_count == 1 ? nullptr : &entries);
+ assert(entry_count == expected_entry_count,
+ "expected entry count %d does not match declared entry count %d for stub %s",
+ expected_entry_count, entry_count, StubInfo::name(stub_id));
+ address start = load_archive_data(stub_id, entries_ptr);
+ if (start != nullptr) {
+ assert(entries.length() + 1 == expected_entry_count,
+ "expected entry count %d does not match return entry count %d for stub %s",
+ expected_entry_count, entries.length() + 1, StubInfo::name(stub_id));
+ if (nopush_entry != nullptr) {
+ *nopush_entry = entries.at(0);
+ }
+ return start;
+ }
+
Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
// Input registers (after setup_arg_regs)
@@ -1896,7 +2256,7 @@ class StubGenerator: public StubCodeGenerator {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
@@ -1913,6 +2273,7 @@ class StubGenerator: public StubCodeGenerator {
// Caller of this entry point must set up the argument registers.
if (nopush_entry != nullptr) {
*nopush_entry = __ pc();
+ entries.append(*nopush_entry);
BLOCK_COMMENT("Entry:");
}
@@ -2010,6 +2371,8 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // record the stub entry and end plus any no_push entry
+ store_archive_data(stub_id, start, __ pc() , entries_ptr);
return start;
}
@@ -2072,13 +2435,18 @@ class StubGenerator: public StubCodeGenerator {
address int_copy_entry,
address long_copy_entry) {
StubId stub_id = StubId::stubgen_unsafe_arraycopy_id;
-
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
Label L_long_aligned, L_int_aligned, L_short_aligned;
Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
// bump this on entry, not on exit:
@@ -2104,6 +2472,9 @@ class StubGenerator: public StubCodeGenerator {
__ lsr(count, count, LogBytesPerLong); // size => long_count
__ b(RuntimeAddress(long_copy_entry));
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -2125,7 +2496,12 @@ class StubGenerator: public StubCodeGenerator {
address int_copy_entry, address oop_copy_entry,
address long_copy_entry, address checkcast_copy_entry) {
StubId stub_id = StubId::stubgen_generic_arraycopy_id;
-
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
Label L_failed, L_objArray;
Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
@@ -2144,7 +2520,7 @@ class StubGenerator: public StubCodeGenerator {
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
@@ -2383,6 +2759,9 @@ class StubGenerator: public StubCodeGenerator {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -2427,10 +2806,15 @@ class StubGenerator: public StubCodeGenerator {
default:
ShouldNotReachHere();
};
-
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
BLOCK_COMMENT("Entry:");
@@ -2563,15 +2947,32 @@ class StubGenerator: public StubCodeGenerator {
__ bind(L_exit2);
__ leave();
__ ret(lr);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address generate_unsafecopy_common_error_exit() {
- address start_pc = __ pc();
+ StubId stub_id = StubId::stubgen_unsafecopy_common_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, stub_id);
+ start = __ pc();
__ leave();
__ mov(r0, 0);
__ ret(lr);
- return start_pc;
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
+ return start;
}
//
@@ -2589,13 +2990,28 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg2 - byte value
//
address generate_unsafe_setmemory() {
+ StubId stub_id = StubId::stubgen_unsafe_setmemory_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ // we expect one set of extra unsafememory access handler entries
+ GrowableArray extras;
+ int extra_count = 1 * UnsafeMemoryAccess::COLUMN_COUNT;
+ address start = load_archive_data(stub_id, nullptr, &extras);
+ if (start != nullptr) {
+ assert(extras.length() == extra_count,
+ "unexpected extra entry count %d", extras.length());
+ register_unsafe_access_handlers(extras, 0, 1);
+ return start;
+ }
+
__ align(CodeEntryAlignment);
- StubCodeMark mark(this, StubId::stubgen_unsafe_setmemory_id);
- address start = __ pc();
+ StubCodeMark mark(this, stub_id);
+ start = __ pc();
Register dest = c_rarg0, count = c_rarg1, value = c_rarg2;
Label tail;
+ {
UnsafeMemoryAccessMark umam(this, true, false);
__ enter(); // required for proper stackwalking of RuntimeStub frame
@@ -2679,6 +3095,17 @@ class StubGenerator: public StubCodeGenerator {
__ bind(finished);
__ leave();
__ ret(lr);
+ // have to exit the block and destroy the UnsafeMemoryAccessMark
+ // in order to retrieve the handler end address
+ }
+
+ // install saved handler addresses in extras
+ address end = __ pc();
+ retrieve_unsafe_access_handlers(start, end, extras);
+ assert(extras.length() == extra_count,
+ "incorrect handlers count %d", extras.length());
+ // record the stub entry and end plus the extras
+ store_archive_data(stub_id, start, end, nullptr, &extras);
return start;
}
@@ -2686,33 +3113,45 @@ class StubGenerator: public StubCodeGenerator {
address generate_data_cache_writeback() {
const Register line = c_rarg0; // address of line to write back
- __ align(CodeEntryAlignment);
-
StubId stub_id = StubId::stubgen_data_cache_writeback_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
__ cache_wb(Address(line, 0));
__ leave();
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address generate_data_cache_writeback_sync() {
- const Register is_pre = c_rarg0; // pre or post sync
-
- __ align(CodeEntryAlignment);
-
StubId stub_id = StubId::stubgen_data_cache_writeback_sync_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ const Register is_pre = c_rarg0; // pre or post sync
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
// pre wbsync is a no-op
// post wbsync translates to an sfence
Label skip;
- address start = __ pc();
+ start = __ pc();
__ enter();
__ cbnz(is_pre, skip);
__ cache_wbsync(false);
@@ -2720,6 +3159,9 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -2882,8 +3324,15 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg2 - sessionKe (key) in little endian int array
//
address generate_aescrypt_encryptBlock() {
- __ align(CodeEntryAlignment);
+ assert(UseAES, "need AES cryptographic extension support");
StubId stub_id = StubId::stubgen_aescrypt_encryptBlock_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
const Register from = c_rarg0; // source array address
@@ -2891,7 +3340,7 @@ class StubGenerator: public StubCodeGenerator {
const Register key = c_rarg2; // key array address
const Register keylen = rscratch1;
- address start = __ pc();
+ start = __ pc();
__ enter();
__ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
@@ -2904,6 +3353,9 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -2916,8 +3368,14 @@ class StubGenerator: public StubCodeGenerator {
//
address generate_aescrypt_decryptBlock() {
assert(UseAES, "need AES cryptographic extension support");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_aescrypt_decryptBlock_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
Label L_doLast;
@@ -2926,7 +3384,7 @@ class StubGenerator: public StubCodeGenerator {
const Register key = c_rarg2; // key array address
const Register keylen = rscratch1;
- address start = __ pc();
+ start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
@@ -2938,6 +3396,9 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -2955,8 +3416,14 @@ class StubGenerator: public StubCodeGenerator {
//
address generate_cipherBlockChaining_encryptAESCrypt() {
assert(UseAES, "need AES cryptographic extension support");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_cipherBlockChaining_encryptAESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;
@@ -2969,7 +3436,7 @@ class StubGenerator: public StubCodeGenerator {
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
const Register keylen = rscratch1;
- address start = __ pc();
+ start = __ pc();
__ enter();
@@ -3043,6 +3510,9 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3060,8 +3530,14 @@ class StubGenerator: public StubCodeGenerator {
//
address generate_cipherBlockChaining_decryptAESCrypt() {
assert(UseAES, "need AES cryptographic extension support");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_cipherBlockChaining_decryptAESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;
@@ -3074,7 +3550,7 @@ class StubGenerator: public StubCodeGenerator {
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
const Register keylen = rscratch1;
- address start = __ pc();
+ start = __ pc();
__ enter();
@@ -3152,6 +3628,9 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3188,6 +3667,13 @@ class StubGenerator: public StubCodeGenerator {
// r0 - input length
//
address generate_counterMode_AESCrypt() {
+ StubId stub_id = StubId::stubgen_counterMode_AESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
const Register in = c_rarg0;
const Register out = c_rarg1;
const Register key = c_rarg2;
@@ -3248,9 +3734,8 @@ class StubGenerator: public StubCodeGenerator {
// Wide bulk encryption of whole blocks.
__ align(CodeEntryAlignment);
- StubId stub_id = StubId::stubgen_counterMode_AESCrypt_id;
StubCodeMark mark(this, stub_id);
- const address start = __ pc();
+ start = __ pc();
__ enter();
Label DONE, CTR_large_block, large_block_return;
@@ -3435,6 +3920,9 @@ class StubGenerator: public StubCodeGenerator {
__ strw(used, Address(used_ptr));
__ b(large_block_return);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3451,11 +3939,16 @@ class StubGenerator: public StubCodeGenerator {
// return - number of processed bytes
address generate_galoisCounterMode_AESCrypt() {
Label ghash_polynomial; // local data generated after code
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_galoisCounterMode_AESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register in = c_rarg0;
@@ -3567,6 +4060,9 @@ class StubGenerator: public StubCodeGenerator {
// 128-bit vector
__ emit_int64(0x87);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3685,10 +4181,16 @@ class StubGenerator: public StubCodeGenerator {
default:
ShouldNotReachHere();
}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
@@ -3815,6 +4317,9 @@ class StubGenerator: public StubCodeGenerator {
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3838,11 +4343,16 @@ class StubGenerator: public StubCodeGenerator {
default:
ShouldNotReachHere();
}
-
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
@@ -3919,6 +4429,9 @@ class StubGenerator: public StubCodeGenerator {
__ emit_int32(0x8f1bbcdc);
__ emit_int32(0xca62c1d6);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3943,30 +4456,15 @@ class StubGenerator: public StubCodeGenerator {
default:
ShouldNotReachHere();
}
-
- static const uint32_t round_consts[64] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
- };
-
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
-
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
@@ -3987,7 +4485,7 @@ class StubGenerator: public StubCodeGenerator {
// t1 == v7
// load 16 keys to v16..v31
- __ lea(rscratch1, ExternalAddress((address)round_consts));
+ __ lea(rscratch1, ExternalAddress((address)_sha256_round_consts));
__ ld1(v16, v17, v18, v19, __ T4S, __ post(rscratch1, 64));
__ ld1(v20, v21, v22, v23, __ T4S, __ post(rscratch1, 64));
__ ld1(v24, v25, v26, v27, __ T4S, __ post(rscratch1, 64));
@@ -4048,6 +4546,9 @@ class StubGenerator: public StubCodeGenerator {
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -4099,41 +4600,15 @@ class StubGenerator: public StubCodeGenerator {
default:
ShouldNotReachHere();
}
-
- static const uint64_t round_consts[80] = {
- 0x428A2F98D728AE22L, 0x7137449123EF65CDL, 0xB5C0FBCFEC4D3B2FL,
- 0xE9B5DBA58189DBBCL, 0x3956C25BF348B538L, 0x59F111F1B605D019L,
- 0x923F82A4AF194F9BL, 0xAB1C5ED5DA6D8118L, 0xD807AA98A3030242L,
- 0x12835B0145706FBEL, 0x243185BE4EE4B28CL, 0x550C7DC3D5FFB4E2L,
- 0x72BE5D74F27B896FL, 0x80DEB1FE3B1696B1L, 0x9BDC06A725C71235L,
- 0xC19BF174CF692694L, 0xE49B69C19EF14AD2L, 0xEFBE4786384F25E3L,
- 0x0FC19DC68B8CD5B5L, 0x240CA1CC77AC9C65L, 0x2DE92C6F592B0275L,
- 0x4A7484AA6EA6E483L, 0x5CB0A9DCBD41FBD4L, 0x76F988DA831153B5L,
- 0x983E5152EE66DFABL, 0xA831C66D2DB43210L, 0xB00327C898FB213FL,
- 0xBF597FC7BEEF0EE4L, 0xC6E00BF33DA88FC2L, 0xD5A79147930AA725L,
- 0x06CA6351E003826FL, 0x142929670A0E6E70L, 0x27B70A8546D22FFCL,
- 0x2E1B21385C26C926L, 0x4D2C6DFC5AC42AEDL, 0x53380D139D95B3DFL,
- 0x650A73548BAF63DEL, 0x766A0ABB3C77B2A8L, 0x81C2C92E47EDAEE6L,
- 0x92722C851482353BL, 0xA2BFE8A14CF10364L, 0xA81A664BBC423001L,
- 0xC24B8B70D0F89791L, 0xC76C51A30654BE30L, 0xD192E819D6EF5218L,
- 0xD69906245565A910L, 0xF40E35855771202AL, 0x106AA07032BBD1B8L,
- 0x19A4C116B8D2D0C8L, 0x1E376C085141AB53L, 0x2748774CDF8EEB99L,
- 0x34B0BCB5E19B48A8L, 0x391C0CB3C5C95A63L, 0x4ED8AA4AE3418ACBL,
- 0x5B9CCA4F7763E373L, 0x682E6FF3D6B2B8A3L, 0x748F82EE5DEFB2FCL,
- 0x78A5636F43172F60L, 0x84C87814A1F0AB72L, 0x8CC702081A6439ECL,
- 0x90BEFFFA23631E28L, 0xA4506CEBDE82BDE9L, 0xBEF9A3F7B2C67915L,
- 0xC67178F2E372532BL, 0xCA273ECEEA26619CL, 0xD186B8C721C0C207L,
- 0xEADA7DD6CDE0EB1EL, 0xF57D4F7FEE6ED178L, 0x06F067AA72176FBAL,
- 0x0A637DC5A2C898A6L, 0x113F9804BEF90DAEL, 0x1B710B35131C471BL,
- 0x28DB77F523047D84L, 0x32CAAB7B40C72493L, 0x3C9EBE0A15C9BEBCL,
- 0x431D67C49C100D4CL, 0x4CC5D4BECB3E42B6L, 0x597F299CFC657E2AL,
- 0x5FCB6FAB3AD6FAECL, 0x6C44198C4A475817L
- };
-
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
-
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
@@ -4151,7 +4626,7 @@ class StubGenerator: public StubCodeGenerator {
__ ld1(v8, v9, v10, v11, __ T2D, state);
// load first 4 round constants
- __ lea(rscratch1, ExternalAddress((address)round_consts));
+ __ lea(rscratch1, ExternalAddress((address)_sha512_round_consts));
__ ld1(v20, v21, v22, v23, __ T2D, __ post(rscratch1, 64));
__ BIND(sha512_loop);
@@ -4236,6 +4711,9 @@ class StubGenerator: public StubCodeGenerator {
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -4349,22 +4827,15 @@ class StubGenerator: public StubCodeGenerator {
default:
ShouldNotReachHere();
}
-
- static const uint64_t round_consts[24] = {
- 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL,
- 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L,
- 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL,
- 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL,
- 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L,
- 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L,
- 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L,
- 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L
- };
-
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
-
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
@@ -4396,7 +4867,7 @@ class StubGenerator: public StubCodeGenerator {
__ movw(rscratch2, 24);
// load round_constants base
- __ lea(rscratch1, ExternalAddress((address) round_consts));
+ __ lea(rscratch1, ExternalAddress((address) _sha3_round_consts));
// load input
__ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
@@ -4488,6 +4959,9 @@ class StubGenerator: public StubCodeGenerator {
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -4495,22 +4969,18 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg0 - long[] state0
// c_rarg1 - long[] state1
address generate_double_keccak() {
- static const uint64_t round_consts[24] = {
- 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL,
- 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L,
- 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL,
- 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL,
- 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L,
- 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L,
- 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L,
- 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L
- };
-
+ StubId stub_id = StubId::stubgen_double_keccak_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
// Implements the double_keccak() method of the
// sun.secyrity.provider.SHA3Parallel class
__ align(CodeEntryAlignment);
- StubCodeMark mark(this, "StubRoutines", "double_keccak");
- address start = __ pc();
+ StubCodeMark mark(this, stub_id);
+ start = __ pc();
__ enter();
Register state0 = c_rarg0;
@@ -4546,7 +5016,7 @@ class StubGenerator: public StubCodeGenerator {
__ movw(rscratch2, 24);
// load round_constants base
- __ lea(rscratch1, ExternalAddress((address) round_consts));
+ __ lea(rscratch1, ExternalAddress((address) _double_keccak_round_consts));
__ BIND(rounds24_loop);
__ subw(rscratch2, rscratch2, 1);
@@ -4578,6 +5048,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -4611,11 +5084,17 @@ class StubGenerator: public StubCodeGenerator {
// vectors write their first lane back to the keystream buffer, followed
// by the second lane from all vectors and so on.
address generate_chacha20Block_blockpar() {
+ StubId stub_id = StubId::stubgen_chacha20Block_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
Label L_twoRounds, L_cc20_const;
__ align(CodeEntryAlignment);
- StubId stub_id = StubId::stubgen_chacha20Block_id;
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
int i, j;
@@ -4770,6 +5249,9 @@ class StubGenerator: public StubCodeGenerator {
__ emit_int64(0x0605040702010003UL);
__ emit_int64(0x0E0D0C0F0A09080BUL);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -5258,11 +5740,16 @@ class StubGenerator: public StubCodeGenerator {
// coeffs (short[256]) = c_rarg0
// ntt_zetas (short[256]) = c_rarg1
address generate_kyberNtt() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberNtt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register coeffs = c_rarg0;
@@ -5486,6 +5973,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -5496,11 +5986,16 @@ class StubGenerator: public StubCodeGenerator {
// coeffs (short[256]) = c_rarg0
// ntt_zetas (short[256]) = c_rarg1
address generate_kyberInverseNtt() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberInverseNtt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register coeffs = c_rarg0;
@@ -5770,6 +6265,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -5783,11 +6281,16 @@ class StubGenerator: public StubCodeGenerator {
// nttb (short[256]) = c_rarg2
// zetas (short[128]) = c_rarg3
address generate_kyberNttMult() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberNttMult_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register result = c_rarg0;
@@ -5889,6 +6392,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -5900,11 +6406,16 @@ class StubGenerator: public StubCodeGenerator {
// a (short[256]) = c_rarg1
// b (short[256]) = c_rarg2
address generate_kyberAddPoly_2() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberAddPoly_2_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register result = c_rarg0;
@@ -5973,6 +6484,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -5985,11 +6499,16 @@ class StubGenerator: public StubCodeGenerator {
// b (short[256]) = c_rarg2
// c (short[256]) = c_rarg3
address generate_kyberAddPoly_3() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberAddPoly_3_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register result = c_rarg0;
@@ -6072,6 +6591,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -6081,19 +6603,29 @@ class StubGenerator: public StubCodeGenerator {
// static int implKyber12To16(
// byte[] condensed, int index, short[] parsed, int parsedLength) {}
//
- // (parsedLength or (parsedLength - 48) must be divisible by 64.)
+ // we assume that parsed and condensed are allocated such that for
+ // n = (parsedLength + 63) / 64
+ // n blocks of 96 bytes of input can be processed, i.e.
+ // index + n * 96 <= condensed.length and
+ // n * 64 <= parsed.length
//
// condensed (byte[]) = c_rarg0
// condensedIndex = c_rarg1
- // parsed (short[112 or 256]) = c_rarg2
- // parsedLength (112 or 256) = c_rarg3
+ // parsed (short[]) = c_rarg2
+ // parsedLength = c_rarg3
address generate_kyber12To16() {
- Label L_F00, L_loop, L_end;
+ StubId stub_id = StubId::stubgen_kyber12To16_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ Label L_F00, L_loop;
__ align(CodeEntryAlignment);
- StubId stub_id = StubId::stubgen_kyber12To16_id;
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register condensed = c_rarg0;
@@ -6209,75 +6741,8 @@ class StubGenerator: public StubCodeGenerator {
vs_st2_post(vs_front(vb), __ T8H, parsed);
__ sub(parsedLength, parsedLength, 64);
- __ cmp(parsedLength, (u1)64);
- __ br(Assembler::GE, L_loop);
- __ cbz(parsedLength, L_end);
-
- // if anything is left it should be a final 72 bytes of input
- // i.e. a final 48 12-bit values. so we handle this by loading
- // 48 bytes into all 16B lanes of front(vin) and only 24
- // bytes into the lower 8B lane of back(vin)
- vs_ld3_post(vs_front(vin), __ T16B, condensed);
- vs_ld3(vs_back(vin), __ T8B, condensed);
-
- // Expand vin[0] into va[0:1], and vin[1] into va[2:3] and va[4:5]
- // n.b. target elements 2 and 3 of va duplicate elements 4 and
- // 5 and target element 2 of vb duplicates element 4.
- __ ushll(va[0], __ T8H, vin[0], __ T8B, 0);
- __ ushll2(va[1], __ T8H, vin[0], __ T16B, 0);
- __ ushll(va[2], __ T8H, vin[1], __ T8B, 0);
- __ ushll2(va[3], __ T8H, vin[1], __ T16B, 0);
- __ ushll(va[4], __ T8H, vin[1], __ T8B, 0);
- __ ushll2(va[5], __ T8H, vin[1], __ T16B, 0);
-
- // This time expand just the lower 8 lanes
- __ ushll(vb[0], __ T8H, vin[3], __ T8B, 0);
- __ ushll(vb[2], __ T8H, vin[4], __ T8B, 0);
- __ ushll(vb[4], __ T8H, vin[4], __ T8B, 0);
-
- // shift lo byte of copy 1 of the middle stripe into the high byte
- __ shl(va[2], __ T8H, va[2], 8);
- __ shl(va[3], __ T8H, va[3], 8);
- __ shl(vb[2], __ T8H, vb[2], 8);
-
- // expand vin[2] into va[6:7] and lower 8 lanes of vin[5] into
- // vb[6] pre-shifted by 4 to ensure top bits of the input 12-bit
- // int are in bit positions [4..11].
- __ ushll(va[6], __ T8H, vin[2], __ T8B, 4);
- __ ushll2(va[7], __ T8H, vin[2], __ T16B, 4);
- __ ushll(vb[6], __ T8H, vin[5], __ T8B, 4);
-
- // mask hi 4 bits of each 1st 12-bit int in pair from copy1 and
- // shift lo 4 bits of each 2nd 12-bit int in pair to bottom of
- // copy2
- __ andr(va[2], __ T16B, va[2], v31);
- __ andr(va[3], __ T16B, va[3], v31);
- __ ushr(va[4], __ T8H, va[4], 4);
- __ ushr(va[5], __ T8H, va[5], 4);
- __ andr(vb[2], __ T16B, vb[2], v31);
- __ ushr(vb[4], __ T8H, vb[4], 4);
-
-
-
- // sum hi 4 bits and lo 8 bits of each 1st 12-bit int in pair and
- // hi 8 bits plus lo 4 bits of each 2nd 12-bit int in pair
-
- // n.b. ordering ensures: i) inputs are consumed before they are
- // overwritten ii) order of 16-bit results across succsessive
- // pairs of vectors in va and then lower half of vb reflects order
- // of corresponding 12-bit inputs
- __ addv(va[0], __ T8H, va[0], va[2]);
- __ addv(va[2], __ T8H, va[1], va[3]);
- __ addv(va[1], __ T8H, va[4], va[6]);
- __ addv(va[3], __ T8H, va[5], va[7]);
- __ addv(vb[0], __ T8H, vb[0], vb[2]);
- __ addv(vb[1], __ T8H, vb[4], vb[6]);
-
- // store 48 results interleaved as shorts
- vs_st2_post(vs_front(va), __ T8H, parsed);
- vs_st2_post(vs_front(vs_front(vb)), __ T8H, parsed);
-
- __ BIND(L_end);
+ __ cmp(parsedLength, (u1)0);
+ __ br(Assembler::GT, L_loop);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ mov(r0, zr); // return 0
@@ -6288,6 +6753,9 @@ class StubGenerator: public StubCodeGenerator {
__ emit_int64(0x0f000f000f000f00);
__ emit_int64(0x0f000f000f000f00);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -6297,11 +6765,16 @@ class StubGenerator: public StubCodeGenerator {
//
// coeffs (short[256]) = c_rarg0
address generate_kyberBarrettReduce() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberBarrettReduce_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register coeffs = c_rarg0;
@@ -6381,6 +6854,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -6544,11 +7020,16 @@ class StubGenerator: public StubCodeGenerator {
// coeffs (int[256]) = c_rarg0
// zetas (int[256]) = c_rarg1
address generate_dilithiumAlmostNtt() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumAlmostNtt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register coeffs = c_rarg0;
@@ -6659,6 +7140,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -6751,11 +7235,16 @@ class StubGenerator: public StubCodeGenerator {
// coeffs (int[256]) = c_rarg0
// zetas (int[256]) = c_rarg1
address generate_dilithiumAlmostInverseNtt() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumAlmostInverseNtt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register coeffs = c_rarg0;
@@ -6851,6 +7340,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -6864,11 +7356,16 @@ class StubGenerator: public StubCodeGenerator {
// poly1 (int[256]) = c_rarg1
// poly2 (int[256]) = c_rarg2
address generate_dilithiumNttMult() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumNttMult_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
Label L_loop;
@@ -6917,6 +7414,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -6928,11 +7428,16 @@ class StubGenerator: public StubCodeGenerator {
// coeffs (int[256]) = c_rarg0
// constant (int) = c_rarg1
address generate_dilithiumMontMulByConstant() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumMontMulByConstant_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
Label L_loop;
@@ -6978,6 +7483,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -6992,11 +7500,16 @@ class StubGenerator: public StubCodeGenerator {
// twoGamma2 (int) = c_rarg3
// multiplier (int) = c_rarg4
address generate_dilithiumDecomposePoly() {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumDecomposePoly_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_loop;
const Register input = c_rarg0;
@@ -7136,6 +7649,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(r0, zr); // return 0
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -7275,21 +7791,15 @@ class StubGenerator: public StubCodeGenerator {
default:
ShouldNotReachHere();
}
-
- static const uint64_t round_consts[24] = {
- 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL,
- 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L,
- 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL,
- 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL,
- 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L,
- 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L,
- 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L,
- 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L
- };
-
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
@@ -7441,7 +7951,7 @@ class StubGenerator: public StubCodeGenerator {
__ fmovs(v1, 1.0); // exact representation
__ str(buf, Address(sp, 16));
- __ lea(tmp3, ExternalAddress((address) round_consts));
+ __ lea(tmp3, ExternalAddress((address) _sha3_round_consts));
__ BIND(loop_body);
keccak_round_gpr(can_use_fp, can_use_r18, tmp3,
@@ -7496,6 +8006,9 @@ class StubGenerator: public StubCodeGenerator {
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -7512,12 +8025,17 @@ class StubGenerator: public StubCodeGenerator {
*/
address generate_updateBytesCRC32() {
assert(UseCRC32Intrinsics, "what are we doing here?");
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_updateBytesCRC32_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register crc = c_rarg0; // crc
const Register buf = c_rarg1; // source java byte array address
@@ -7537,6 +8055,9 @@ class StubGenerator: public StubCodeGenerator {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -7554,12 +8075,17 @@ class StubGenerator: public StubCodeGenerator {
*/
address generate_updateBytesCRC32C() {
assert(UseCRC32CIntrinsics, "what are we doing here?");
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_updateBytesCRC32C_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register crc = c_rarg0; // crc
const Register buf = c_rarg1; // source java byte array address
@@ -7579,6 +8105,9 @@ class StubGenerator: public StubCodeGenerator {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -7594,10 +8123,16 @@ class StubGenerator: public StubCodeGenerator {
* c_rarg0 - int adler result
*/
address generate_updateBytesAdler32() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_updateBytesAdler32_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_simple_by1_loop, L_nmax, L_nmax_loop, L_by16, L_by16_loop, L_by1_loop, L_do_mod, L_combine, L_by1;
@@ -7765,6 +8300,9 @@ class StubGenerator: public StubCodeGenerator {
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -7816,11 +8354,17 @@ class StubGenerator: public StubCodeGenerator {
* c_rarg4 - z address
*/
address generate_multiplyToLen() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_multiplyToLen_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register x = r0;
const Register xlen = r1;
const Register y = r2;
@@ -7842,6 +8386,9 @@ class StubGenerator: public StubCodeGenerator {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -7849,10 +8396,16 @@ class StubGenerator: public StubCodeGenerator {
// squareToLen algorithm for sizes 1..127 described in java code works
// faster than multiply_to_len on some CPUs and slower on others, but
// multiply_to_len shows a bit better overall results
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_squareToLen_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register x = r0;
const Register xlen = r1;
@@ -7879,15 +8432,25 @@ class StubGenerator: public StubCodeGenerator {
__ pop(spilled_regs, sp);
__ leave();
__ ret(lr);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address generate_mulAdd() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_mulAdd_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register out = r0;
const Register in = r1;
@@ -7901,6 +8464,9 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -7914,10 +8480,16 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg4 - numIter
//
address generate_bigIntegerRightShift() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_bigIntegerRightShiftWorker_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label ShiftSIMDLoop, ShiftTwoLoop, ShiftThree, ShiftTwo, ShiftOne, Exit;
@@ -8024,6 +8596,9 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(Exit);
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -8037,10 +8612,16 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg4 - numIter
//
address generate_bigIntegerLeftShift() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_bigIntegerLeftShiftWorker_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label ShiftSIMDLoop, ShiftTwoLoop, ShiftThree, ShiftTwo, ShiftOne, Exit;
@@ -8135,10 +8716,25 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(Exit);
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address generate_count_positives(address &count_positives_long) {
+ StubId stub_id = StubId::stubgen_count_positives_id;
+ GrowableArray entries;
+ int entry_count = StubInfo::entry_count(stub_id);
+ // We have an extra entry for count_positives_long.
+ assert(entry_count == 2, "sanity check");
+ address start = load_archive_data(stub_id, &entries);
+ if (start != nullptr) {
+ assert(entries.length() == 1,
+ "unexpected extra entry count %d", entries.length());
+ count_positives_long = entries.at(0);
+ return start;
+ }
const u1 large_loop_size = 64;
const uint64_t UPPER_BIT_MASK=0x8080808080808080;
int dcache_line = VM_Version::dcache_line_size();
@@ -8146,8 +8742,6 @@ class StubGenerator: public StubCodeGenerator {
Register ary1 = r1, len = r2, result = r0;
__ align(CodeEntryAlignment);
-
- StubId stub_id = StubId::stubgen_count_positives_id;
StubCodeMark mark(this, stub_id);
address entry = __ pc();
@@ -8190,6 +8784,7 @@ class StubGenerator: public StubCodeGenerator {
const RegSet spilled_regs = RegSet::range(tmp1, tmp5) + tmp6;
count_positives_long = __ pc(); // 2nd entry point
+ entries.append(count_positives_long);
__ enter();
@@ -8304,6 +8899,9 @@ class StubGenerator: public StubCodeGenerator {
__ sub(result, result, len);
__ ret(lr);
+ // record the stub entry and end plus the extra entry
+ store_archive_data(stub_id, entry, __ pc(), &entries);
+
return entry;
}
@@ -8394,6 +8992,13 @@ class StubGenerator: public StubCodeGenerator {
// r3-r5 are reserved temporary registers
// Clobbers: v0-v7 when UseSIMDForArrayEquals, rscratch1, rscratch2
address generate_large_array_equals() {
+ StubId stub_id = StubId::stubgen_large_array_equals_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
tmp2 = rscratch2, tmp3 = r3, tmp4 = r4, tmp5 = r5, tmp6 = r11,
tmp7 = r12, tmp8 = r13;
@@ -8409,7 +9014,6 @@ class StubGenerator: public StubCodeGenerator {
__ align(CodeEntryAlignment);
- StubId stub_id = StubId::stubgen_large_array_equals_id;
StubCodeMark mark(this, stub_id);
address entry = __ pc();
@@ -8484,6 +9088,10 @@ class StubGenerator: public StubCodeGenerator {
__ bind(NOT_EQUAL_NO_POP);
__ leave();
__ ret(lr);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, entry, __ pc());
+
return entry;
}
@@ -8492,6 +9100,33 @@ class StubGenerator: public StubCodeGenerator {
// cnt = r2 - elements count
// Clobbers: v0-v13, rscratch1, rscratch2
address generate_large_arrays_hashcode(BasicType eltype) {
+ StubId stub_id;
+ switch (eltype) {
+ case T_BOOLEAN:
+ stub_id = StubId::stubgen_large_arrays_hashcode_boolean_id;
+ break;
+ case T_BYTE:
+ stub_id = StubId::stubgen_large_arrays_hashcode_byte_id;
+ break;
+ case T_CHAR:
+ stub_id = StubId::stubgen_large_arrays_hashcode_char_id;
+ break;
+ case T_SHORT:
+ stub_id = StubId::stubgen_large_arrays_hashcode_short_id;
+ break;
+ case T_INT:
+ stub_id = StubId::stubgen_large_arrays_hashcode_int_id;
+ break;
+ default:
+ stub_id = StubId::NO_STUBID;
+ ShouldNotReachHere();
+ };
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
const Register result = r0, ary = r1, cnt = r2;
const FloatRegister vdata0 = v3, vdata1 = v2, vdata2 = v1, vdata3 = v0;
const FloatRegister vmul0 = v4, vmul1 = v5, vmul2 = v6, vmul3 = v7;
@@ -8535,28 +9170,6 @@ class StubGenerator: public StubCodeGenerator {
__ align(CodeEntryAlignment);
- StubId stub_id;
- switch (eltype) {
- case T_BOOLEAN:
- stub_id = StubId::stubgen_large_arrays_hashcode_boolean_id;
- break;
- case T_BYTE:
- stub_id = StubId::stubgen_large_arrays_hashcode_byte_id;
- break;
- case T_CHAR:
- stub_id = StubId::stubgen_large_arrays_hashcode_char_id;
- break;
- case T_SHORT:
- stub_id = StubId::stubgen_large_arrays_hashcode_short_id;
- break;
- case T_INT:
- stub_id = StubId::stubgen_large_arrays_hashcode_int_id;
- break;
- default:
- stub_id = StubId::NO_STUBID;
- ShouldNotReachHere();
- };
-
StubCodeMark mark(this, stub_id);
address entry = __ pc();
@@ -8791,19 +9404,32 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, entry, __ pc());
+
return entry;
}
address generate_dsin_dcos(bool isCos) {
- __ align(CodeEntryAlignment);
StubId stub_id = (isCos ? StubId::stubgen_dcos_id : StubId::stubgen_dsin_id);
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ generate_dsin_dcos(isCos, (address)StubRoutines::aarch64::_npio2_hw,
(address)StubRoutines::aarch64::_two_over_pi,
(address)StubRoutines::aarch64::_pio2,
(address)StubRoutines::aarch64::_dsin_coef,
(address)StubRoutines::aarch64::_dcos_coef);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -8847,8 +9473,14 @@ class StubGenerator: public StubCodeGenerator {
// r10 = tmp1
// r11 = tmp2
address generate_compare_long_string_different_encoding(bool isLU) {
- __ align(CodeEntryAlignment);
StubId stub_id = (isLU ? StubId::stubgen_compare_long_string_LU_id : StubId::stubgen_compare_long_string_UL_id);
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
address entry = __ pc();
Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
@@ -8950,20 +9582,34 @@ class StubGenerator: public StubCodeGenerator {
__ subw(result, tmp1, rscratch1);
__ bind(DONE);
__ ret(lr);
- return entry;
+
+ // record the stub entry and end
+ store_archive_data(stub_id, entry, __ pc());
+
+ return entry;
}
// r0 = input (float16)
// v0 = result (float)
// v1 = temporary float register
address generate_float16ToFloat() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_hf2f_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
address entry = __ pc();
BLOCK_COMMENT("Entry:");
__ flt16_to_flt(v0, r0, v1);
__ ret(lr);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, entry, __ pc());
+
return entry;
}
@@ -8971,24 +9617,40 @@ class StubGenerator: public StubCodeGenerator {
// r0 = result (float16)
// v1 = temporary float register
address generate_floatToFloat16() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_f2hf_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
address entry = __ pc();
BLOCK_COMMENT("Entry:");
__ flt_to_flt16(r0, v0, v1);
__ ret(lr);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, entry, __ pc());
+
return entry;
}
address generate_method_entry_barrier() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_method_entry_barrier_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
Label deoptimize_label;
- address start = __ pc();
+ start = __ pc();
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
@@ -9037,6 +9699,9 @@ class StubGenerator: public StubCodeGenerator {
__ mov(sp, rscratch1);
__ br(rscratch2);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -9048,8 +9713,14 @@ class StubGenerator: public StubCodeGenerator {
// r10 = tmp1
// r11 = tmp2
address generate_compare_long_string_same_encoding(bool isLL) {
- __ align(CodeEntryAlignment);
StubId stub_id = (isLL ? StubId::stubgen_compare_long_string_LL_id : StubId::stubgen_compare_long_string_UU_id);
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
address entry = __ pc();
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
@@ -9157,6 +9828,10 @@ class StubGenerator: public StubCodeGenerator {
__ bind(LENGTH_DIFF);
__ ret(lr);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, entry, __ pc());
+
return entry;
}
@@ -9188,8 +9863,14 @@ class StubGenerator: public StubCodeGenerator {
case UU: stub_id = StubId::stubgen_compare_long_string_UU_id; break;
default: ShouldNotReachHere();
}
-
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
+ StubCodeMark mark(this, stub_id);
address entry = __ pc();
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
tmp1 = r10, tmp2 = r11;
@@ -9224,8 +9905,6 @@ class StubGenerator: public StubCodeGenerator {
ShouldNotReachHere(); \
}
- StubCodeMark mark(this, stub_id);
-
__ mov(idx, 0);
__ sve_whilelt(pgtmp1, mode == LL ? __ B : __ H, idx, cnt);
@@ -9269,6 +9948,10 @@ class StubGenerator: public StubCodeGenerator {
__ bind(DONE);
__ ret(lr);
#undef LOAD_PAIR
+
+ // record the stub entry and end
+ store_archive_data(stub_id, entry, __ pc());
+
return entry;
}
@@ -9330,6 +10013,12 @@ class StubGenerator: public StubCodeGenerator {
stub_id = StubId::stubgen_string_indexof_linear_uu_id;
}
}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
address entry = __ pc();
@@ -9598,6 +10287,10 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(DONE);
__ pop(spilled_regs, sp);
__ ret(lr);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, entry, __ pc());
+
return entry;
}
@@ -9628,8 +10321,14 @@ class StubGenerator: public StubCodeGenerator {
// v1 = loaded 8 bytes
// Clobbers: r0, r1, r3, rscratch1, rflags, v0-v6
address generate_large_byte_array_inflate() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_large_byte_array_inflate_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
address entry = __ pc();
Label LOOP, LOOP_START, LOOP_PRFM, LOOP_PRFM_START, DONE;
@@ -9668,6 +10367,10 @@ class StubGenerator: public StubCodeGenerator {
__ br(__ GE, LOOP);
__ bind(DONE);
__ ret(lr);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, entry, __ pc());
+
return entry;
}
@@ -9683,7 +10386,7 @@ class StubGenerator: public StubCodeGenerator {
* Output:
* Updated state at c_rarg0
*/
- address generate_ghash_processBlocks() {
+ address generate_ghash_processBlocks_small() {
// Bafflingly, GCM uses little-endian for the byte order, but
// big-endian for the bit order. For example, the polynomial 1 is
// represented as the 16-byte string 80 00 00 00 | 12 bytes of 00.
@@ -9695,11 +10398,17 @@ class StubGenerator: public StubCodeGenerator {
// that) and keep the data in little-endian bit order through the
// calculation, bit-reversing the inputs and outputs.
- StubId stub_id = StubId::stubgen_ghash_processBlocks_id;
+ StubId stub_id = StubId::stubgen_ghash_processBlocks_small_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
Label polynomial; // local data generated at end of stub
- __ align(CodeEntryAlignment);
- address start = __ pc();
+ start = __ pc();
Register state = c_rarg0;
Register subkeyH = c_rarg1;
@@ -9759,17 +10468,24 @@ class StubGenerator: public StubCodeGenerator {
// 128-bit vector
__ emit_int64(0x87);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
- address generate_ghash_processBlocks_wide() {
- address small = generate_ghash_processBlocks();
-
- StubId stub_id = StubId::stubgen_ghash_processBlocks_wide_id;
- StubCodeMark mark(this, stub_id);
+ address generate_ghash_processBlocks(address small) {
+ StubId stub_id = StubId::stubgen_ghash_processBlocks_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
Label polynomial; // local data generated after stub
__ align(CodeEntryAlignment);
- address start = __ pc();
+ StubCodeMark mark(this, stub_id);
+ start = __ pc();
Register state = c_rarg0;
Register subkeyH = c_rarg1;
@@ -9811,8 +10527,10 @@ class StubGenerator: public StubCodeGenerator {
// 128-bit vector
__ emit_int64(0x87);
- return start;
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+ return start;
}
void generate_base64_encode_simdround(Register src, Register dst,
@@ -9863,26 +10581,16 @@ class StubGenerator: public StubCodeGenerator {
*/
address generate_base64_encodeBlock() {
- static const char toBase64[64] = {
- 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
- 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
- };
-
- static const char toBase64URL[64] = {
- 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
- 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
- };
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_base64_encodeBlock_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register src = c_rarg0; // source array
Register soff = c_rarg1; // source start offset
@@ -9902,9 +10610,9 @@ class StubGenerator: public StubCodeGenerator {
__ sub(length, send, soff);
// load the codec base address
- __ lea(codec, ExternalAddress((address) toBase64));
+ __ lea(codec, ExternalAddress((address) _encodeBlock_toBase64));
__ cbz(isURL, ProcessData);
- __ lea(codec, ExternalAddress((address) toBase64URL));
+ __ lea(codec, ExternalAddress((address) _encodeBlock_toBase64URL));
__ BIND(ProcessData);
@@ -9957,6 +10665,9 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(Exit);
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -10078,80 +10789,16 @@ class StubGenerator: public StubCodeGenerator {
// on http://0x80.pl/articles/base64-simd-neon.html#encoding-quadwords, in section
// titled "Base64 decoding".
- // Non-SIMD lookup tables are mostly dumped from fromBase64 array used in java.util.Base64,
- // except the trailing character '=' is also treated illegal value in this intrinsic. That
- // is java.util.Base64.fromBase64['='] = -2, while fromBase(URL)64ForNoSIMD['='] = 255 here.
- static const uint8_t fromBase64ForNoSIMD[256] = {
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 62u, 255u, 255u, 255u, 63u,
- 52u, 53u, 54u, 55u, 56u, 57u, 58u, 59u, 60u, 61u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u,
- 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 255u, 255u, 255u, 255u, 255u,
- 255u, 26u, 27u, 28u, 29u, 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, 38u, 39u, 40u,
- 41u, 42u, 43u, 44u, 45u, 46u, 47u, 48u, 49u, 50u, 51u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- };
-
- static const uint8_t fromBase64URLForNoSIMD[256] = {
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 62u, 255u, 255u,
- 52u, 53u, 54u, 55u, 56u, 57u, 58u, 59u, 60u, 61u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u,
- 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 255u, 255u, 255u, 255u, 63u,
- 255u, 26u, 27u, 28u, 29u, 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, 38u, 39u, 40u,
- 41u, 42u, 43u, 44u, 45u, 46u, 47u, 48u, 49u, 50u, 51u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- };
-
- // A legal value of base64 code is in range [0, 127]. We need two lookups
- // with tbl/tbx and combine them to get the decode data. The 1st table vector
- // lookup use tbl, out of range indices are set to 0 in destination. The 2nd
- // table vector lookup use tbx, out of range indices are unchanged in
- // destination. Input [64..126] is mapped to index [65, 127] in second lookup.
- // The value of index 64 is set to 0, so that we know that we already get the
- // decoded data with the 1st lookup.
- static const uint8_t fromBase64ForSIMD[128] = {
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 62u, 255u, 255u, 255u, 63u,
- 52u, 53u, 54u, 55u, 56u, 57u, 58u, 59u, 60u, 61u, 255u, 255u, 255u, 255u, 255u, 255u,
- 0u, 255u, 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u,
- 14u, 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 255u, 255u, 255u, 255u,
- 255u, 255u, 26u, 27u, 28u, 29u, 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, 38u, 39u,
- 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u, 48u, 49u, 50u, 51u, 255u, 255u, 255u, 255u,
- };
-
- static const uint8_t fromBase64URLForSIMD[128] = {
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u,
- 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 255u, 62u, 255u, 255u,
- 52u, 53u, 54u, 55u, 56u, 57u, 58u, 59u, 60u, 61u, 255u, 255u, 255u, 255u, 255u, 255u,
- 0u, 255u, 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u,
- 14u, 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 255u, 255u, 255u, 255u,
- 63u, 255u, 26u, 27u, 28u, 29u, 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u, 38u, 39u,
- 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u, 48u, 49u, 50u, 51u, 255u, 255u, 255u, 255u,
- };
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_base64_decodeBlock_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register src = c_rarg0; // source array
Register soff = c_rarg1; // source start offset
@@ -10178,9 +10825,9 @@ class StubGenerator: public StubCodeGenerator {
__ sub(length, send, soff);
__ bfm(length, zr, 0, 1);
- __ lea(nosimd_codec, ExternalAddress((address) fromBase64ForNoSIMD));
+ __ lea(nosimd_codec, ExternalAddress((address) _decodeBlock_fromBase64ForNoSIMD));
__ cbz(isURL, ProcessData);
- __ lea(nosimd_codec, ExternalAddress((address) fromBase64URLForNoSIMD));
+ __ lea(nosimd_codec, ExternalAddress((address) _decodeBlock_fromBase64URLForNoSIMD));
__ BIND(ProcessData);
__ mov(rscratch1, length);
@@ -10225,9 +10872,9 @@ class StubGenerator: public StubCodeGenerator {
__ cbzw(rscratch1, Exit);
__ sub(length, length, 80);
- __ lea(simd_codec, ExternalAddress((address) fromBase64ForSIMD));
+ __ lea(simd_codec, ExternalAddress((address) _decodeBlock_fromBase64ForSIMD));
__ cbz(isURL, SIMDEnter);
- __ lea(simd_codec, ExternalAddress((address) fromBase64URLForSIMD));
+ __ lea(simd_codec, ExternalAddress((address) _decodeBlock_fromBase64URLForSIMD));
__ BIND(SIMDEnter);
__ ld1(v0, v1, v2, v3, __ T16B, __ post(simd_codec, 64));
@@ -10260,24 +10907,50 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
// Support for spin waits.
address generate_spin_wait() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_spin_wait_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ spin_wait();
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
void generate_lookup_secondary_supers_table_stub() {
StubId stub_id = StubId::stubgen_lookup_secondary_supers_table_id;
+ GrowableArray entries;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == Klass::SECONDARY_SUPERS_TABLE_SIZE, "sanity check");
+ address start = load_archive_data(stub_id, &entries);
+ if (start != nullptr) {
+ assert(entries.length() == Klass::SECONDARY_SUPERS_TABLE_SIZE - 1,
+ "unexpected extra entry count %d", entries.length());
+ StubRoutines::_lookup_secondary_supers_table_stubs[0] = start;
+ for (int slot = 1; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) {
+ StubRoutines::_lookup_secondary_supers_table_stubs[slot] = entries.at(slot - 1);
+ }
+ return;
+ }
+
StubCodeMark mark(this, stub_id);
const Register
@@ -10292,7 +10965,13 @@ class StubGenerator: public StubCodeGenerator {
vtemp = v0;
for (int slot = 0; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) {
- StubRoutines::_lookup_secondary_supers_table_stubs[slot] = __ pc();
+ address next_entry = __ pc();
+ StubRoutines::_lookup_secondary_supers_table_stubs[slot] = next_entry;
+ if (slot == 0) {
+ start = next_entry;
+ } else {
+ entries.append(next_entry);
+ }
Label L_success;
__ enter();
__ lookup_secondary_supers_table_const(r_sub_klass, r_super_klass,
@@ -10302,14 +10981,21 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
}
+ // record the stub entry and end plus all the auxiliary entries
+ store_archive_data(stub_id, start, __ pc(), &entries);
}
// Slow path implementation for UseSecondarySupersTable.
address generate_lookup_secondary_supers_table_slow_path_stub() {
StubId stub_id = StubId::stubgen_lookup_secondary_supers_table_slow_path_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
-
- address start = __ pc();
+ start = __ pc();
const Register
r_super_klass = r0, // argument
r_array_base = r1, // argument
@@ -10321,6 +11007,9 @@ class StubGenerator: public StubCodeGenerator {
__ lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, result);
__ ret(lr);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -10460,14 +11149,43 @@ class StubGenerator: public StubCodeGenerator {
if (! UseLSE) {
return;
}
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_atomic_entry_points_id;
- StubCodeMark mark(this, stub_id);
- address first_entry = __ pc();
+ GrowableArray entries;
+ int entry_count = StubInfo::entry_count(stub_id);
+ address start = load_archive_data(stub_id, &entries);
+ if (start != nullptr) {
+ assert(entries.length() == entry_count - 1,
+ "unexpected extra entry count %d", entries.length());
+ aarch64_atomic_fetch_add_4_impl = (aarch64_atomic_stub_t)start;
+ int idx = 0;
+ aarch64_atomic_fetch_add_8_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_fetch_add_4_relaxed_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_fetch_add_8_relaxed_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_xchg_4_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_xchg_8_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_cmpxchg_1_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_cmpxchg_4_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_cmpxchg_8_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_cmpxchg_1_relaxed_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_cmpxchg_4_relaxed_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_cmpxchg_8_relaxed_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_cmpxchg_4_release_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_cmpxchg_8_release_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_cmpxchg_4_seq_cst_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ aarch64_atomic_cmpxchg_8_seq_cst_impl = (aarch64_atomic_stub_t)entries.at(idx++);
+ assert(idx == entries.length(), "sanity!");
+ return;
+ }
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, stub_id);
+ start = __ pc();
+ address end;
+ {
// ADD, memory_order_conservative
AtomicStubMark mark_fetch_add_4(_masm, &aarch64_atomic_fetch_add_4_impl);
gen_ldadd_entry(Assembler::word, memory_order_conservative);
+
AtomicStubMark mark_fetch_add_8(_masm, &aarch64_atomic_fetch_add_8_impl);
gen_ldadd_entry(Assembler::xword, memory_order_conservative);
@@ -10475,6 +11193,7 @@ class StubGenerator: public StubCodeGenerator {
AtomicStubMark mark_fetch_add_4_relaxed
(_masm, &aarch64_atomic_fetch_add_4_relaxed_impl);
gen_ldadd_entry(MacroAssembler::word, memory_order_relaxed);
+
AtomicStubMark mark_fetch_add_8_relaxed
(_masm, &aarch64_atomic_fetch_add_8_relaxed_impl);
gen_ldadd_entry(MacroAssembler::xword, memory_order_relaxed);
@@ -10482,14 +11201,17 @@ class StubGenerator: public StubCodeGenerator {
// XCHG, memory_order_conservative
AtomicStubMark mark_xchg_4(_masm, &aarch64_atomic_xchg_4_impl);
gen_swpal_entry(Assembler::word);
- AtomicStubMark mark_xchg_8_impl(_masm, &aarch64_atomic_xchg_8_impl);
+
+ AtomicStubMark mark_xchg_8(_masm, &aarch64_atomic_xchg_8_impl);
gen_swpal_entry(Assembler::xword);
// CAS, memory_order_conservative
AtomicStubMark mark_cmpxchg_1(_masm, &aarch64_atomic_cmpxchg_1_impl);
gen_cas_entry(MacroAssembler::byte, memory_order_conservative);
+
AtomicStubMark mark_cmpxchg_4(_masm, &aarch64_atomic_cmpxchg_4_impl);
gen_cas_entry(MacroAssembler::word, memory_order_conservative);
+
AtomicStubMark mark_cmpxchg_8(_masm, &aarch64_atomic_cmpxchg_8_impl);
gen_cas_entry(MacroAssembler::xword, memory_order_conservative);
@@ -10497,9 +11219,11 @@ class StubGenerator: public StubCodeGenerator {
AtomicStubMark mark_cmpxchg_1_relaxed
(_masm, &aarch64_atomic_cmpxchg_1_relaxed_impl);
gen_cas_entry(MacroAssembler::byte, memory_order_relaxed);
+
AtomicStubMark mark_cmpxchg_4_relaxed
(_masm, &aarch64_atomic_cmpxchg_4_relaxed_impl);
gen_cas_entry(MacroAssembler::word, memory_order_relaxed);
+
AtomicStubMark mark_cmpxchg_8_relaxed
(_masm, &aarch64_atomic_cmpxchg_8_relaxed_impl);
gen_cas_entry(MacroAssembler::xword, memory_order_relaxed);
@@ -10507,6 +11231,7 @@ class StubGenerator: public StubCodeGenerator {
AtomicStubMark mark_cmpxchg_4_release
(_masm, &aarch64_atomic_cmpxchg_4_release_impl);
gen_cas_entry(MacroAssembler::word, memory_order_release);
+
AtomicStubMark mark_cmpxchg_8_release
(_masm, &aarch64_atomic_cmpxchg_8_release_impl);
gen_cas_entry(MacroAssembler::xword, memory_order_release);
@@ -10514,11 +11239,41 @@ class StubGenerator: public StubCodeGenerator {
AtomicStubMark mark_cmpxchg_4_seq_cst
(_masm, &aarch64_atomic_cmpxchg_4_seq_cst_impl);
gen_cas_entry(MacroAssembler::word, memory_order_seq_cst);
+
AtomicStubMark mark_cmpxchg_8_seq_cst
(_masm, &aarch64_atomic_cmpxchg_8_seq_cst_impl);
gen_cas_entry(MacroAssembler::xword, memory_order_seq_cst);
- ICache::invalidate_range(first_entry, __ pc() - first_entry);
+ end = __ pc();
+
+ ICache::invalidate_range(start, end - start);
+ // exit block to force update of AtomicStubMark targets
+ }
+
+ assert(start == (address)aarch64_atomic_fetch_add_4_impl,
+ "atomic stub should be at start of buffer");
+ // record the stub start and end plus all the entries saved by the
+ // AtomicStubMark destructor
+ entries.append((address)aarch64_atomic_fetch_add_8_impl);
+ entries.append((address)aarch64_atomic_fetch_add_4_relaxed_impl);
+ entries.append((address)aarch64_atomic_fetch_add_8_relaxed_impl);
+ entries.append((address)aarch64_atomic_xchg_4_impl);
+ entries.append((address)aarch64_atomic_xchg_8_impl);
+ entries.append((address)aarch64_atomic_cmpxchg_1_impl);
+ entries.append((address)aarch64_atomic_cmpxchg_4_impl);
+ entries.append((address)aarch64_atomic_cmpxchg_8_impl);
+ entries.append((address)aarch64_atomic_cmpxchg_1_relaxed_impl);
+ entries.append((address)aarch64_atomic_cmpxchg_4_relaxed_impl);
+ entries.append((address)aarch64_atomic_cmpxchg_8_relaxed_impl);
+ entries.append((address)aarch64_atomic_cmpxchg_4_release_impl);
+ entries.append((address)aarch64_atomic_cmpxchg_8_release_impl);
+ entries.append((address)aarch64_atomic_cmpxchg_4_seq_cst_impl);
+ entries.append((address)aarch64_atomic_cmpxchg_8_seq_cst_impl);
+
+ assert(entries.length() == entry_count - 1,
+ "unexpected extra entry count %d", entries.length());
+
+ store_archive_data(stub_id, start, end, &entries);
}
#endif // LINUX
@@ -10622,9 +11377,19 @@ class StubGenerator: public StubCodeGenerator {
if (!Continuations::enabled()) return nullptr;
StubId stub_id = StubId::stubgen_cont_thaw_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
generate_cont_thaw(Continuation::thaw_top);
+
+ // record the stub start and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -10633,11 +11398,20 @@ class StubGenerator: public StubCodeGenerator {
// TODO: will probably need multiple return barriers depending on return type
StubId stub_id = StubId::stubgen_cont_returnBarrier_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
generate_cont_thaw(Continuation::thaw_return_barrier);
+ // record the stub start and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -10645,19 +11419,34 @@ class StubGenerator: public StubCodeGenerator {
if (!Continuations::enabled()) return nullptr;
StubId stub_id = StubId::stubgen_cont_returnBarrierExc_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
generate_cont_thaw(Continuation::thaw_return_barrier_exception);
+ // record the stub start and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address generate_cont_preempt_stub() {
if (!Continuations::enabled()) return nullptr;
StubId stub_id = StubId::stubgen_cont_preempt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ reset_last_Java_frame(true);
@@ -10682,6 +11471,9 @@ class StubGenerator: public StubCodeGenerator {
__ ldr(rscratch1, Address(rscratch1));
__ br(rscratch1);
+ // record the stub start and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -10737,10 +11529,16 @@ class StubGenerator: public StubCodeGenerator {
// computation.
address generate_poly1305_processBlocks() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_poly1305_processBlocks_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label here;
__ enter();
RegSet callee_saved = RegSet::range(r19, r28);
@@ -10848,14 +11646,23 @@ class StubGenerator: public StubCodeGenerator {
__ leave();
__ ret(lr);
+ // record the stub start and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
// exception handler for upcall stubs
address generate_upcall_stub_exception_handler() {
StubId stub_id = StubId::stubgen_upcall_stub_exception_handler_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// Native caller has no idea how to handle exceptions,
// so we just crash here. Up to callee to catch exceptions.
@@ -10864,6 +11671,9 @@ class StubGenerator: public StubCodeGenerator {
__ blr(rscratch1);
__ should_not_reach_here();
+ // record the stub start and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -10872,8 +11682,14 @@ class StubGenerator: public StubCodeGenerator {
// rmethod = result
address generate_upcall_stub_load_target() {
StubId stub_id = StubId::stubgen_upcall_stub_load_target_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ resolve_global_jobject(j_rarg0, rscratch1, rscratch2);
// Load target method from receiver
@@ -10887,6 +11703,9 @@ class StubGenerator: public StubCodeGenerator {
__ ret(lr);
+ // record the stub start and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -11286,8 +12105,6 @@ class StubGenerator: public StubCodeGenerator {
*/
address generate_multiply() {
Label argh, nothing;
- bind(argh);
- stop("MontgomeryMultiply total_allocation must be <= 8192");
align(CodeEntryAlignment);
address entry = pc();
@@ -11394,6 +12211,10 @@ class StubGenerator: public StubCodeGenerator {
bind(nothing);
ret(lr);
+ // handler for error case
+ bind(argh);
+ stop("MontgomeryMultiply total_allocation must be <= 8192");
+
return entry;
}
// In C, approximately:
@@ -11497,8 +12318,6 @@ class StubGenerator: public StubCodeGenerator {
*/
address generate_square() {
Label argh;
- bind(argh);
- stop("MontgomeryMultiply total_allocation must be <= 8192");
align(CodeEntryAlignment);
address entry = pc();
@@ -11607,6 +12426,10 @@ class StubGenerator: public StubCodeGenerator {
leave();
ret(lr);
+ // handler for error case
+ bind(argh);
+ stop("MontgomeryMultiply total_allocation must be <= 8192");
+
return entry;
}
// In C, approximately:
@@ -11805,7 +12628,9 @@ class StubGenerator: public StubCodeGenerator {
}
#endif
- StubRoutines::_unsafe_setmemory = generate_unsafe_setmemory();
+ if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_setMemory)) {
+ StubRoutines::_unsafe_setmemory = generate_unsafe_setmemory();
+ }
StubRoutines::aarch64::set_completed(); // Inidicate that arraycopy and zero_blocks stubs are generated
}
@@ -11814,7 +12639,7 @@ class StubGenerator: public StubCodeGenerator {
#if COMPILER2_OR_JVMCI
if (UseSVE == 0) {
- StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices(StubId::stubgen_vector_iota_indices_id);
+ generate_iota_indices(StubId::stubgen_vector_iota_indices_id);
}
// array equals stub for large arrays.
@@ -11859,18 +12684,32 @@ class StubGenerator: public StubCodeGenerator {
if (UseMontgomeryMultiplyIntrinsic) {
StubId stub_id = StubId::stubgen_montgomeryMultiply_id;
- StubCodeMark mark(this, stub_id);
- MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
- StubRoutines::_montgomeryMultiply = g.generate_multiply();
+ address start = load_archive_data(stub_id);
+ if (start == nullptr) {
+ // we have to generate it
+ StubCodeMark mark(this, stub_id);
+ MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
+ start = g.generate_multiply();
+ // record the stub start and end
+ store_archive_data(stub_id, start, _masm->pc());
+ }
+ StubRoutines::_montgomeryMultiply = start;
}
if (UseMontgomerySquareIntrinsic) {
StubId stub_id = StubId::stubgen_montgomerySquare_id;
- StubCodeMark mark(this, stub_id);
- MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
- // We use generate_multiply() rather than generate_square()
- // because it's faster for the sizes of modulus we care about.
- StubRoutines::_montgomerySquare = g.generate_multiply();
+ address start = load_archive_data(stub_id);
+ if (start == nullptr) {
+ // we have to generate it
+ StubCodeMark mark(this, stub_id);
+ MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
+ // We use generate_multiply() rather than generate_square()
+ // because it's faster for the sizes of modulus we care about.
+ start = g.generate_multiply();
+ // record the stub start and end
+ store_archive_data(stub_id, start, _masm->pc());
+ }
+ StubRoutines::_montgomerySquare = start;
}
#endif // COMPILER2
@@ -11915,7 +12754,8 @@ class StubGenerator: public StubCodeGenerator {
}
if (UseGHASHIntrinsics) {
// StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
- StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks_wide();
+ StubRoutines::aarch64::_ghash_processBlocks_small = generate_ghash_processBlocks_small();
+ StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(StubRoutines::aarch64::_ghash_processBlocks_small);
}
if (UseAESIntrinsics && UseGHASHIntrinsics) {
StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
@@ -11937,16 +12777,13 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(StubId::stubgen_sha512_implCompress_id);
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(StubId::stubgen_sha512_implCompressMB_id);
}
- if (UseSHA3Intrinsics) {
-
+ if (UseSHA3Intrinsics && UseSIMDForSHA3Intrinsic) {
StubRoutines::_double_keccak = generate_double_keccak();
- if (UseSIMDForSHA3Intrinsic) {
- StubRoutines::_sha3_implCompress = generate_sha3_implCompress(StubId::stubgen_sha3_implCompress_id);
- StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress(StubId::stubgen_sha3_implCompressMB_id);
- } else {
- StubRoutines::_sha3_implCompress = generate_sha3_implCompress_gpr(StubId::stubgen_sha3_implCompress_id);
- StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress_gpr(StubId::stubgen_sha3_implCompressMB_id);
- }
+ StubRoutines::_sha3_implCompress = generate_sha3_implCompress(StubId::stubgen_sha3_implCompress_id);
+ StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress(StubId::stubgen_sha3_implCompressMB_id);
+ } else if (UseSHA3Intrinsics) {
+ StubRoutines::_sha3_implCompress = generate_sha3_implCompress_gpr(StubId::stubgen_sha3_implCompress_id);
+ StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress_gpr(StubId::stubgen_sha3_implCompressMB_id);
}
if (UsePoly1305Intrinsics) {
@@ -11962,7 +12799,7 @@ class StubGenerator: public StubCodeGenerator {
}
public:
- StubGenerator(CodeBuffer* code, BlobId blob_id) : StubCodeGenerator(code, blob_id) {
+ StubGenerator(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) : StubCodeGenerator(code, blob_id, stub_data) {
switch(blob_id) {
case BlobId::stubgen_preuniverse_id:
generate_preuniverse_stubs();
@@ -11984,12 +12821,35 @@ class StubGenerator: public StubCodeGenerator {
break;
};
}
+
+#if INCLUDE_CDS
+ static void init_AOTAddressTable(GrowableArray& external_addresses) {
+ // external data defined in this file
+#define ADD(addr) external_addresses.append((address)(addr));
+ ADD(_sha256_round_consts);
+ ADD(_sha512_round_consts);
+ ADD(_sha3_round_consts);
+ ADD(_double_keccak_round_consts);
+ ADD(_encodeBlock_toBase64);
+ ADD(_encodeBlock_toBase64URL);
+ ADD(_decodeBlock_fromBase64ForNoSIMD);
+ ADD(_decodeBlock_fromBase64URLForNoSIMD);
+ ADD(_decodeBlock_fromBase64ForSIMD);
+ ADD(_decodeBlock_fromBase64URLForSIMD);
+#undef ADD
+ }
+#endif // INCLUDE_CDS
}; // end class declaration
-void StubGenerator_generate(CodeBuffer* code, BlobId blob_id) {
- StubGenerator g(code, blob_id);
+void StubGenerator_generate(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) {
+ StubGenerator g(code, blob_id, stub_data);
}
+#if INCLUDE_CDS
+void StubGenerator_init_AOTAddressTable(GrowableArray& addresses) {
+ StubGenerator::init_AOTAddressTable(addresses);
+}
+#endif // INCLUDE_CDS
#if defined (LINUX)
diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp
index 88993818b47..f02b681ca10 100644
--- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp
@@ -41,8 +41,12 @@ static void empty_spin_wait() { }
#define DEFINE_ARCH_ENTRY_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
address StubRoutines:: arch :: STUB_FIELD_NAME(field_name) = CAST_FROM_FN_PTR(address, init_function);
-STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY, DEFINE_ARCH_ENTRY_INIT)
+#define DEFINE_ARCH_ENTRY_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ address StubRoutines:: arch :: STUB_FIELD_NAME(field_name) [count];
+STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY, DEFINE_ARCH_ENTRY_INIT, DEFINE_ARCH_ENTRY_ARRAY)
+
+#undef DEFINE_ARCH_ENTRY_ARARAY
#undef DEFINE_ARCH_ENTRY_INIT
#undef DEFINE_ARCH_ENTRY
@@ -413,3 +417,36 @@ ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::aarch64::_pio2[] = {
2.73370053816464559624e-44, // 0x36E3822280000000
2.16741683877804819444e-51, // 0x3569F31D00000000
};
+
+#if INCLUDE_CDS
+extern void StubGenerator_init_AOTAddressTable(GrowableArray& addresses);
+
+void StubRoutines::init_AOTAddressTable() {
+ ResourceMark rm;
+ GrowableArray external_addresses;
+ // publish static addresses referred to by aarch64 generator
+ // n.b. we have to use use an extern call here because class
+ // StubGenerator, which provides the static method that knows how to
+ // add the relevant addresses, is declared in a source file rather
+ // than in a separately includeable header.
+ StubGenerator_init_AOTAddressTable(external_addresses);
+ // publish external data addresses defined in nested aarch64 class
+ StubRoutines::aarch64::init_AOTAddressTable(external_addresses);
+ AOTCodeCache::publish_external_addresses(external_addresses);
+}
+
+void StubRoutines::aarch64::init_AOTAddressTable(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ ADD(_kyberConsts);
+ ADD(_dilithiumConsts);
+ // this is added in generic code
+ // ADD(_crc_table);
+ ADD(_adler_table);
+ ADD(_npio2_hw);
+ ADD(_dsin_coef);
+ ADD(_dcos_coef);
+ ADD(_two_over_pi);
+ ADD(_pio2);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
index c35371e1083..6067408ef13 100644
--- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
@@ -60,9 +60,13 @@ class aarch64 {
#define DECLARE_ARCH_ENTRY_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
DECLARE_ARCH_ENTRY(arch, blob_name, stub_name, field_name, getter_name)
-private:
- STUBGEN_ARCH_ENTRIES_DO(DECLARE_ARCH_ENTRY, DECLARE_ARCH_ENTRY_INIT)
+#define DECLARE_ARCH_ENTRY_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ static address STUB_FIELD_NAME(field_name) [count];
+private:
+ STUBGEN_ARCH_ENTRIES_DO(DECLARE_ARCH_ENTRY, DECLARE_ARCH_ENTRY_INIT, DECLARE_ARCH_ENTRY_ARRAY)
+
+#undef DECLARE_ARCH_ENTRY_ARRAY
#undef DECLARE_ARCH_ENTRY_INIT
#undef DECLARE_ARCH_ENTRY
@@ -78,8 +82,15 @@ private:
#define DEFINE_ARCH_ENTRY_GETTER_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
DEFINE_ARCH_ENTRY_GETTER(arch, blob_name, stub_name, field_name, getter_name)
- STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY_GETTER, DEFINE_ARCH_ENTRY_GETTER_INIT)
+#define DEFINE_ARCH_ENTRY_GETTER_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ static address getter_name(int idx) { \
+ assert(0 <= idx && idx < count, "entry array index out of range"); \
+ return STUB_FIELD_NAME(field_name) [idx]; \
+ }
+ STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY_GETTER, DEFINE_ARCH_ENTRY_GETTER_INIT, DEFINE_ARCH_ENTRY_GETTER_ARRAY)
+
+#undef DEFINE_ARCH_ENTRY_GETTER_ARRAY
#undef DEFINE_ARCH_ENTRY_GETTER_INIT
#undef DEFINE_ARCH_ENTRY_GETTER
@@ -110,6 +121,11 @@ private:
_completed = true;
}
+#if INCLUDE_CDS
+ static void init_AOTAddressTable(GrowableArray& external_addresses);
+#endif // INCLUDE_CDS
+
+
private:
static uint16_t _kyberConsts[];
static uint32_t _dilithiumConsts[];
diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
index 07b469650f0..69769fb8441 100644
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
@@ -96,10 +96,6 @@ static inline Address aaddress(Register r) {
return iaddress(r);
}
-static inline Address at_rsp() {
- return Address(esp, 0);
-}
-
// At top of Java expression stack which may be different than esp(). It
// isn't for category 1 objects.
static inline Address at_tos () {
@@ -3370,7 +3366,7 @@ void TemplateTable::invokevirtual_helper(Register index,
__ load_klass(r0, recv);
// profile this call
- __ profile_virtual_call(r0, rlocals, r3);
+ __ profile_virtual_call(r0, rlocals);
// get target Method & entry point
__ lookup_virtual_method(r0, index, method);
@@ -3500,7 +3496,7 @@ void TemplateTable::invokeinterface(int byte_no) {
/*return_method=*/false);
// profile this call
- __ profile_virtual_call(r3, r13, r19);
+ __ profile_virtual_call(r3, r13);
// Get declaring interface class from method, and itable index
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
index 659c231464a..441bd4859fe 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2020, Red Hat Inc. All rights reserved.
- * Copyright 2025 Arm Limited and/or its affiliates.
+ * Copyright 2025, 2026 Arm Limited and/or its affiliates.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
*
*/
+#include "logging/log.hpp"
#include "pauth_aarch64.hpp"
#include "register_aarch64.hpp"
#include "runtime/arguments.hpp"
@@ -52,17 +53,56 @@ uintptr_t VM_Version::_pac_mask;
SpinWait VM_Version::_spin_wait;
+bool VM_Version::_cache_dic_enabled;
+bool VM_Version::_cache_idc_enabled;
+bool VM_Version::_ic_ivau_trapped;
+
const char* VM_Version::_features_names[MAX_CPU_FEATURES] = { nullptr };
static SpinWait get_spin_wait_desc() {
- SpinWait spin_wait(OnSpinWaitInst, OnSpinWaitInstCount);
+ SpinWait spin_wait(OnSpinWaitInst, OnSpinWaitInstCount, OnSpinWaitDelay);
if (spin_wait.inst() == SpinWait::SB && !VM_Version::supports_sb()) {
vm_exit_during_initialization("OnSpinWaitInst is SB but current CPU does not support SB instruction");
}
+ if (spin_wait.inst() == SpinWait::WFET) {
+ if (!VM_Version::supports_wfxt()) {
+ vm_exit_during_initialization("OnSpinWaitInst is WFET but the CPU does not support the WFET instruction");
+ }
+
+ if (!VM_Version::supports_ecv()) {
+ vm_exit_during_initialization("The CPU does not support the FEAT_ECV required by the -XX:OnSpinWaitInst=wfet implementation");
+ }
+
+ if (!VM_Version::supports_sb()) {
+ vm_exit_during_initialization("The CPU does not support the SB instruction required by the -XX:OnSpinWaitInst=wfet implementation");
+ }
+
+ if (OnSpinWaitInstCount != 1) {
+ vm_exit_during_initialization("OnSpinWaitInstCount for OnSpinWaitInst 'wfet' must be 1");
+ }
+ } else {
+ if (!FLAG_IS_DEFAULT(OnSpinWaitDelay)) {
+ vm_exit_during_initialization("OnSpinWaitDelay can only be used with -XX:OnSpinWaitInst=wfet");
+ }
+ }
+
return spin_wait;
}
+static bool has_neoverse_n1_errata_1542419() {
+ const int major_rev_num = VM_Version::cpu_variant();
+ const int minor_rev_num = VM_Version::cpu_revision();
+ // Neoverse N1: 0xd0c
+ // Erratum 1542419 affects r3p0, r3p1 and r4p0.
+ // It is fixed in r4p1 and later revisions, which are not affected.
+ return (VM_Version::cpu_family() == VM_Version::CPU_ARM &&
+ VM_Version::model_is(0xd0c) &&
+ ((major_rev_num == 3 && minor_rev_num == 0) ||
+ (major_rev_num == 3 && minor_rev_num == 1) ||
+ (major_rev_num == 4 && minor_rev_num == 0)));
+}
+
void VM_Version::initialize() {
#define SET_CPU_FEATURE_NAME(id, name, bit) \
_features_names[bit] = XSTR(name);
@@ -74,9 +114,14 @@ void VM_Version::initialize() {
_supports_atomic_getset8 = true;
_supports_atomic_getadd8 = true;
- get_os_cpu_info();
+ _cache_dic_enabled = false;
+ _cache_idc_enabled = false;
+ _ic_ivau_trapped = false;
- int dcache_line = VM_Version::dcache_line_size();
+ get_os_cpu_info();
+ _cpu_features = _features;
+
+ int dcache_line = dcache_line_size();
// Limit AllocatePrefetchDistance so that it does not exceed the
// static constraint of 512 defined in runtime/globals.hpp.
@@ -124,7 +169,7 @@ void VM_Version::initialize() {
// if dcpop is available publish data cache line flush size via
// generic field, otherwise let if default to zero thereby
// disabling writeback
- if (VM_Version::supports_dcpop()) {
+ if (supports_dcpop()) {
_data_cache_line_flush_size = dcache_line;
}
}
@@ -201,16 +246,14 @@ void VM_Version::initialize() {
}
}
- // Cortex A53
- if (_cpu == CPU_ARM && model_is(0xd03)) {
+ if (_cpu == CPU_ARM && model_is(CPU_MODEL_ARM_CORTEX_A53)) {
set_feature(CPU_A53MAC);
if (FLAG_IS_DEFAULT(UseSIMDForArrayEquals)) {
FLAG_SET_DEFAULT(UseSIMDForArrayEquals, false);
}
}
- // Cortex A73
- if (_cpu == CPU_ARM && model_is(0xd09)) {
+ if (_cpu == CPU_ARM && model_is(CPU_MODEL_ARM_CORTEX_A73)) {
if (FLAG_IS_DEFAULT(SoftwarePrefetchHintDistance)) {
FLAG_SET_DEFAULT(SoftwarePrefetchHintDistance, -1);
}
@@ -220,16 +263,11 @@ void VM_Version::initialize() {
}
}
- // Neoverse
- // N1: 0xd0c
- // N2: 0xd49
- // N3: 0xd8e
- // V1: 0xd40
- // V2: 0xd4f
- // V3: 0xd84
- if (_cpu == CPU_ARM && (model_is(0xd0c) || model_is(0xd49) ||
- model_is(0xd40) || model_is(0xd4f) ||
- model_is(0xd8e) || model_is(0xd84))) {
+ if (_cpu == CPU_ARM &&
+ model_is_in({ CPU_MODEL_ARM_NEOVERSE_N1, CPU_MODEL_ARM_NEOVERSE_V1,
+ CPU_MODEL_ARM_NEOVERSE_N2, CPU_MODEL_ARM_NEOVERSE_V2,
+ CPU_MODEL_ARM_NEOVERSE_N3, CPU_MODEL_ARM_NEOVERSE_V3,
+ CPU_MODEL_ARM_NEOVERSE_V3AE })) {
if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) {
FLAG_SET_DEFAULT(UseSIMDForMemoryOps, true);
}
@@ -252,21 +290,28 @@ void VM_Version::initialize() {
}
}
- if (FLAG_IS_DEFAULT(UseCRC32)) {
- UseCRC32 = VM_Version::supports_crc32();
+ if (supports_sha1() || supports_sha256() ||
+ supports_sha3() || supports_sha512()) {
+ if (FLAG_IS_DEFAULT(UseSHA)) {
+ FLAG_SET_DEFAULT(UseSHA, true);
+ } else if (!UseSHA) {
+ clear_feature(CPU_SHA1);
+ clear_feature(CPU_SHA2);
+ clear_feature(CPU_SHA3);
+ clear_feature(CPU_SHA512);
+ }
+ } else if (UseSHA) {
+ warning("SHA instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseSHA, false);
}
- if (UseCRC32 && !VM_Version::supports_crc32()) {
- warning("UseCRC32 specified, but not supported on this CPU");
- FLAG_SET_DEFAULT(UseCRC32, false);
- }
+ CHECK_CPU_FEATURE(supports_crc32, CRC32);
+ CHECK_CPU_FEATURE(supports_lse, LSE);
+ CHECK_CPU_FEATURE(supports_aes, AES);
- // Neoverse
- // V1: 0xd40
- // V2: 0xd4f
- // V3: 0xd84
if (_cpu == CPU_ARM &&
- (model_is(0xd40) || model_is(0xd4f) || model_is(0xd84))) {
+ model_is_in({ CPU_MODEL_ARM_NEOVERSE_V1, CPU_MODEL_ARM_NEOVERSE_V2,
+ CPU_MODEL_ARM_NEOVERSE_V3, CPU_MODEL_ARM_NEOVERSE_V3AE })) {
if (FLAG_IS_DEFAULT(UseCryptoPmullForCRC32)) {
FLAG_SET_DEFAULT(UseCryptoPmullForCRC32, true);
}
@@ -275,7 +320,7 @@ void VM_Version::initialize() {
}
}
- if (UseCryptoPmullForCRC32 && (!VM_Version::supports_pmull() || !VM_Version::supports_sha3() || !VM_Version::supports_crc32())) {
+ if (UseCryptoPmullForCRC32 && (!supports_pmull() || !supports_sha3() || !supports_crc32())) {
warning("UseCryptoPmullForCRC32 specified, but not supported on this CPU");
FLAG_SET_DEFAULT(UseCryptoPmullForCRC32, false);
}
@@ -289,48 +334,40 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}
- if (VM_Version::supports_lse()) {
- if (FLAG_IS_DEFAULT(UseLSE))
- FLAG_SET_DEFAULT(UseLSE, true);
- } else {
- if (UseLSE) {
- warning("UseLSE specified, but not supported on this CPU");
- FLAG_SET_DEFAULT(UseLSE, false);
- }
- }
-
- if (VM_Version::supports_aes()) {
- UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
- UseAESIntrinsics =
- UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics));
- if (UseAESIntrinsics && !UseAES) {
- warning("UseAESIntrinsics enabled, but UseAES not, enabling");
- UseAES = true;
+ if (supports_aes()) {
+ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ FLAG_SET_DEFAULT(UseAESIntrinsics, true);
}
if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
}
} else {
- if (UseAES) {
- warning("AES instructions are not available on this CPU");
- FLAG_SET_DEFAULT(UseAES, false);
- }
- if (UseAESIntrinsics) {
- warning("AES intrinsics are not available on this CPU");
- FLAG_SET_DEFAULT(UseAESIntrinsics, false);
- }
- if (UseAESCTRIntrinsics) {
- warning("AES/CTR intrinsics are not available on this CPU");
- FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ if (!UseAES) {
+ if (UseAESIntrinsics) {
+ warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+ if (UseAESCTRIntrinsics) {
+ warning("AES/CTR intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+ } else if (!cpu_supports_aes()) {
+ if (UseAESIntrinsics) {
+ warning("AES intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+ if (UseAESCTRIntrinsics) {
+ warning("AES/CTR intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
}
}
-
if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
UseCRC32Intrinsics = true;
}
- if (VM_Version::supports_crc32()) {
+ if (supports_crc32()) {
if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true);
}
@@ -347,17 +384,7 @@ void VM_Version::initialize() {
UseMD5Intrinsics = true;
}
- if (VM_Version::supports_sha1() || VM_Version::supports_sha256() ||
- VM_Version::supports_sha3() || VM_Version::supports_sha512()) {
- if (FLAG_IS_DEFAULT(UseSHA)) {
- FLAG_SET_DEFAULT(UseSHA, true);
- }
- } else if (UseSHA) {
- warning("SHA instructions are not available on this CPU");
- FLAG_SET_DEFAULT(UseSHA, false);
- }
-
- if (UseSHA && VM_Version::supports_sha1()) {
+ if (UseSHA && supports_sha1()) {
if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
}
@@ -366,7 +393,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
}
- if (UseSHA && VM_Version::supports_sha256()) {
+ if (UseSHA && supports_sha256()) {
if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
}
@@ -375,21 +402,33 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
}
- if (UseSHA && VM_Version::supports_sha3()) {
- // Auto-enable UseSHA3Intrinsics on hardware with performance benefit.
- // Note that the evaluation of UseSHA3Intrinsics shows better performance
+ if (UseSHA) {
+ // No need to check supports_sha3(), since a fallback GPR intrinsic implementation is provided.
+ if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
+ }
+ } else if (UseSHA3Intrinsics) {
+ // Matches the documented and tested behavior: the -UseSHA option disables all SHA intrinsics.
+ warning("UseSHA3Intrinsics requires that UseSHA is enabled.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
+ if (UseSHA3Intrinsics && supports_sha3()) {
+ // Auto-enable UseSIMDForSHA3Intrinsic on hardware with performance benefit.
+ // Note that the evaluation of SHA3 extension Intrinsics shows better performance
// on Apple and Qualcomm silicon but worse performance on Neoverse V1 and N2.
if (_cpu == CPU_APPLE || _cpu == CPU_QUALCOMM) { // Apple or Qualcomm silicon
- if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
- FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
+ if (FLAG_IS_DEFAULT(UseSIMDForSHA3Intrinsic)) {
+ FLAG_SET_DEFAULT(UseSIMDForSHA3Intrinsic, true);
}
}
- } else if (UseSHA3Intrinsics && UseSIMDForSHA3Intrinsic) {
+ }
+ if (UseSHA3Intrinsics && UseSIMDForSHA3Intrinsic && !supports_sha3()) {
warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
}
- if (UseSHA && VM_Version::supports_sha512()) {
+ if (UseSHA && supports_sha512()) {
if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
}
@@ -398,11 +437,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
- if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) {
- FLAG_SET_DEFAULT(UseSHA, false);
- }
-
- if (VM_Version::supports_pmull()) {
+ if (supports_pmull()) {
if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
FLAG_SET_DEFAULT(UseGHASHIntrinsics, true);
}
@@ -453,18 +488,20 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseBlockZeroing, true);
}
if (FLAG_IS_DEFAULT(BlockZeroingLowLimit)) {
- FLAG_SET_DEFAULT(BlockZeroingLowLimit, 4 * VM_Version::zva_length());
+ FLAG_SET_DEFAULT(BlockZeroingLowLimit, 4 * zva_length());
}
} else if (UseBlockZeroing) {
- warning("DC ZVA is not available on this CPU");
+ if (!FLAG_IS_DEFAULT(UseBlockZeroing)) {
+ warning("DC ZVA is not available on this CPU");
+ }
FLAG_SET_DEFAULT(UseBlockZeroing, false);
}
- if (VM_Version::supports_sve2()) {
+ if (supports_sve2()) {
if (FLAG_IS_DEFAULT(UseSVE)) {
FLAG_SET_DEFAULT(UseSVE, 2);
}
- } else if (VM_Version::supports_sve()) {
+ } else if (supports_sve()) {
if (FLAG_IS_DEFAULT(UseSVE)) {
FLAG_SET_DEFAULT(UseSVE, 1);
} else if (UseSVE > 1) {
@@ -515,7 +552,7 @@ void VM_Version::initialize() {
// 1) this code has been built with branch-protection and
// 2) the CPU/OS supports it
#ifdef __ARM_FEATURE_PAC_DEFAULT
- if (!VM_Version::supports_paca()) {
+ if (!supports_paca()) {
// Disable PAC to prevent illegal instruction crashes.
warning("ROP-protection specified, but not supported on this CPU. Disabling ROP-protection.");
} else {
@@ -632,6 +669,22 @@ void VM_Version::initialize() {
check_virtualizations();
+#ifdef __APPLE__
+ DefaultWXWriteMode = UseOldWX ? WXWrite : WXArmedForWrite;
+
+ if (TraceWXHealing) {
+ if (pthread_jit_write_protect_supported_np()) {
+ tty->print_cr("### TraceWXHealing is in use");
+ if (StressWXHealing) {
+ tty->print_cr("### StressWXHealing is in use");
+ }
+ } else {
+ tty->print_cr("WX Healing is not in use because MAP_JIT write protection "
+ "does not work on this system.");
+ }
+ }
+#endif
+
// Sync SVE related CPU features with flags
if (UseSVE < 2) {
clear_feature(CPU_SVE2);
@@ -641,6 +694,43 @@ void VM_Version::initialize() {
clear_feature(CPU_SVE);
}
+ if (FLAG_IS_DEFAULT(UseSingleICacheInvalidation) && is_cache_idc_enabled() && is_cache_dic_enabled()) {
+ FLAG_SET_DEFAULT(UseSingleICacheInvalidation, true);
+ }
+
+ if (FLAG_IS_DEFAULT(NeoverseN1ICacheErratumMitigation) && has_neoverse_n1_errata_1542419()
+ && is_cache_idc_enabled() && !is_cache_dic_enabled()) {
+ if (_ic_ivau_trapped) {
+ FLAG_SET_DEFAULT(NeoverseN1ICacheErratumMitigation, true);
+ } else {
+ log_info(os)("IC IVAU is not trapped; disabling NeoverseN1ICacheErratumMitigation");
+ FLAG_SET_DEFAULT(NeoverseN1ICacheErratumMitigation, false);
+ }
+ }
+
+ if (NeoverseN1ICacheErratumMitigation) {
+ if (!has_neoverse_n1_errata_1542419()) {
+ vm_exit_during_initialization("NeoverseN1ICacheErratumMitigation is set for the CPU not having Neoverse N1 errata 1542419");
+ }
+ // If the user explicitly set the flag, verify the trap is active.
+ if (!FLAG_IS_DEFAULT(NeoverseN1ICacheErratumMitigation) && !_ic_ivau_trapped) {
+ vm_exit_during_initialization("NeoverseN1ICacheErratumMitigation is set but IC IVAU is not trapped. "
+ "The optimization is not safe on this system.");
+ }
+ if (FLAG_IS_DEFAULT(UseSingleICacheInvalidation)) {
+ FLAG_SET_DEFAULT(UseSingleICacheInvalidation, true);
+ }
+
+ if (!UseSingleICacheInvalidation) {
+ vm_exit_during_initialization("NeoverseN1ICacheErratumMitigation is set but UseSingleICacheInvalidation is not enabled");
+ }
+ }
+
+ if (UseSingleICacheInvalidation
+ && (!is_cache_idc_enabled() || (!is_cache_dic_enabled() && !NeoverseN1ICacheErratumMitigation))) {
+ vm_exit_during_initialization("UseSingleICacheInvalidation is set but neither IDC nor DIC nor NeoverseN1ICacheErratumMitigation is enabled");
+ }
+
// Construct the "features" string
stringStream ss(512);
ss.print("0x%02x:0x%x:0x%03x:%d", _cpu, _variant, _model, _revision);
@@ -658,16 +748,52 @@ void VM_Version::initialize() {
void VM_Version::insert_features_names(uint64_t features, stringStream& ss) {
int i = 0;
ss.join([&]() {
- while (i < MAX_CPU_FEATURES) {
- if (supports_feature((VM_Version::Feature_Flag)i)) {
- return _features_names[i++];
+ const char* str = nullptr;
+ while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
+ if (supports_feature(features, (VM_Version::Feature_Flag)i)) {
+ str = _features_names[i];
}
i += 1;
}
- return (const char*)nullptr;
+ return str;
}, ", ");
}
+void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
+ uint64_t features = *(uint64_t*)features_buffer;
+ insert_features_names(features, ss);
+}
+
+void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
+ uint64_t vm_features_set1 = *(uint64_t*)features_set1;
+ uint64_t vm_features_set2 = *(uint64_t*)features_set2;
+ int i = 0;
+ ss.join([&]() {
+ const char* str = nullptr;
+ while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
+ Feature_Flag flag = (Feature_Flag)i;
+ if (supports_feature(vm_features_set1, flag) && !supports_feature(vm_features_set2, flag)) {
+ str = _features_names[i];
+ }
+ i += 1;
+ }
+ return str;
+ }, ", ");
+}
+
+int VM_Version::cpu_features_size() {
+ return sizeof(_features);
+}
+
+void VM_Version::store_cpu_features(void* buf) {
+ *(uint64_t*)buf = _features;
+}
+
+bool VM_Version::supports_features(void* features_buffer) {
+ uint64_t features_to_test = *(uint64_t*)features_buffer;
+ return (_features & features_to_test) == features_to_test;
+}
+
#if defined(LINUX)
static bool check_info_file(const char* fpath,
const char* virt1, VirtualizationType vt1,
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
index 17087d243d3..30f1a5d86ca 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -30,6 +30,8 @@
#include "runtime/abstract_vm_version.hpp"
#include "utilities/sizes.hpp"
+#include
+
class stringStream;
#define BIT_MASK(flag) (1ULL<<(flag))
@@ -53,6 +55,15 @@ protected:
static int _max_supported_sve_vector_length;
static bool _rop_protection;
static uintptr_t _pac_mask;
+ // When _prefer_sve_merging_mode_cpy is true, `cpy (imm, zeroing)` is
+ // implemented as `movi; cpy(imm, merging)`.
+ static constexpr bool _prefer_sve_merging_mode_cpy = true;
+ static bool _cache_dic_enabled;
+ static bool _cache_idc_enabled;
+
+ // IC IVAU trap probe for Neoverse N1 erratum 1542419.
+ // Set by get_os_cpu_info() on Linux via ic_ivau_probe_linux_aarch64.S.
+ static bool _ic_ivau_trapped;
static SpinWait _spin_wait;
@@ -112,14 +123,26 @@ public:
CPU_APPLE = 'a',
};
-enum Ampere_CPU_Model {
+ enum Ampere_CPU_Model {
CPU_MODEL_EMAG = 0x0, /* CPU implementer is CPU_AMCC */
CPU_MODEL_ALTRA = 0xd0c, /* CPU implementer is CPU_ARM, Neoverse N1 */
CPU_MODEL_ALTRAMAX = 0xd0c, /* CPU implementer is CPU_ARM, Neoverse N1 */
CPU_MODEL_AMPERE_1 = 0xac3, /* CPU implementer is CPU_AMPERE */
CPU_MODEL_AMPERE_1A = 0xac4, /* CPU implementer is CPU_AMPERE */
CPU_MODEL_AMPERE_1B = 0xac5 /* AMPERE_1B core Implements ARMv8.7 with CSSC, MTE, SM3/SM4 extensions */
-};
+ };
+
+ enum ARM_CPU_Model {
+ CPU_MODEL_ARM_CORTEX_A53 = 0xd03,
+ CPU_MODEL_ARM_CORTEX_A73 = 0xd09,
+ CPU_MODEL_ARM_NEOVERSE_N1 = 0xd0c,
+ CPU_MODEL_ARM_NEOVERSE_V1 = 0xd40,
+ CPU_MODEL_ARM_NEOVERSE_N2 = 0xd49,
+ CPU_MODEL_ARM_NEOVERSE_V2 = 0xd4f,
+ CPU_MODEL_ARM_NEOVERSE_V3AE = 0xd83,
+ CPU_MODEL_ARM_NEOVERSE_V3 = 0xd84,
+ CPU_MODEL_ARM_NEOVERSE_N3 = 0xd8e,
+ };
#define CPU_FEATURE_FLAGS(decl) \
decl(FP, fp, 0) \
@@ -142,7 +165,9 @@ enum Ampere_CPU_Model {
/* flags above must follow Linux HWCAP */ \
decl(SVEBITPERM, svebitperm, 27) \
decl(SVE2, sve2, 28) \
- decl(A53MAC, a53mac, 31)
+ decl(A53MAC, a53mac, 31) \
+ decl(ECV, ecv, 32) \
+ decl(WFXT, wfxt, 33)
enum Feature_Flag {
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = bit,
@@ -170,6 +195,11 @@ enum Ampere_CPU_Model {
static bool supports_feature(Feature_Flag flag) {
return (_features & BIT_MASK(flag)) != 0;
}
+ static bool supports_feature(uint64_t features, Feature_Flag flag) {
+ return (features & BIT_MASK(flag)) != 0;
+ }
+
+ static bool cpu_supports_aes() { return supports_feature(_cpu_features, CPU_AES); }
static int cpu_family() { return _cpu; }
static int cpu_model() { return _model; }
@@ -181,7 +211,16 @@ enum Ampere_CPU_Model {
return _model == cpu_model || _model2 == cpu_model;
}
- static bool is_zva_enabled() { return 0 <= _zva_length; }
+ static bool model_is_in(std::initializer_list cpu_models) {
+ for (const int& cpu_model : cpu_models) {
+ if (_model == cpu_model || _model2 == cpu_model) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ static bool is_zva_enabled() { return 0 < _zva_length; }
static int zva_length() {
assert(is_zva_enabled(), "ZVA not available");
return _zva_length;
@@ -216,11 +255,31 @@ enum Ampere_CPU_Model {
static bool use_rop_protection() { return _rop_protection; }
+ static bool prefer_sve_merging_mode_cpy() { return _prefer_sve_merging_mode_cpy; }
+
// For common 64/128-bit unpredicated vector operations, we may prefer
// emitting NEON instructions rather than the corresponding SVE instructions.
static bool use_neon_for_vector(int vector_length_in_bytes) {
return vector_length_in_bytes <= 16;
}
+
+ static bool is_cache_dic_enabled() { return _cache_dic_enabled; }
+ static bool is_cache_idc_enabled() { return _cache_idc_enabled; }
+ static bool is_ic_ivau_trapped() { return _ic_ivau_trapped; }
+
+ static void get_cpu_features_name(void* features_buffer, stringStream& ss);
+
+ // Returns names of features present in features_set1 but not in features_set2
+ static void get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss);
+
+ // Returns number of bytes required to store cpu features representation
+ static int cpu_features_size();
+
+ // Stores cpu features representation in the provided buffer. This representation is arch dependent.
+ // Size of the buffer must be same as returned by cpu_features_size()
+ static void store_cpu_features(void* buf);
+
+ static bool supports_features(void* features_to_test);
};
#endif // CPU_AARCH64_VM_VERSION_AARCH64_HPP
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index 606275d7666..60a0ef307b5 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -1088,10 +1088,8 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack,
return clone_base_plus_offset_address(m, mstack, address_visited);
}
-// Return whether or not this register is ever used as an argument. This
-// function is used on startup to build the trampoline stubs in generateOptoStub.
-// Registers not mentioned will be killed by the VM call in the trampoline, and
-// arguments in those registers not be available to the callee.
+#ifdef ASSERT
+// Return whether or not this register is ever used as an argument.
bool Matcher::can_be_java_arg( int reg ) {
if (reg == R_R0_num ||
reg == R_R1_num ||
@@ -1102,10 +1100,7 @@ bool Matcher::can_be_java_arg( int reg ) {
reg <= R_S13_num) return true;
return false;
}
-
-bool Matcher::is_spillable_arg( int reg ) {
- return can_be_java_arg(reg);
-}
+#endif
uint Matcher::int_pressure_limit()
{
@@ -1117,10 +1112,6 @@ uint Matcher::float_pressure_limit()
return (FLOATPRESSURE == -1) ? 30 : FLOATPRESSURE;
}
-bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
- return false;
-}
-
// Register for DIVI projection of divmodI
const RegMask& Matcher::divI_proj_mask() {
ShouldNotReachHere();
@@ -4445,6 +4436,18 @@ instruct membar_release_lock() %{
ins_pipe(empty);
%}
+instruct membar_storeload() %{
+ match(MemBarStoreLoad);
+ ins_cost(4*MEMORY_REF_COST);
+
+ size(4);
+ format %{ "MEMBAR-storeload" %}
+ ins_encode %{
+ __ membar(MacroAssembler::StoreLoad, noreg);
+ %}
+ ins_pipe(long_memory_op);
+%}
+
instruct membar_volatile() %{
match(MemBarVolatile);
ins_cost(4*MEMORY_REF_COST);
@@ -4468,6 +4471,18 @@ instruct unnecessary_membar_volatile() %{
ins_pipe(empty);
%}
+instruct membar_full() %{
+ match(MemBarFull);
+ ins_cost(4*MEMORY_REF_COST);
+
+ size(4);
+ format %{ "MEMBAR-full" %}
+ ins_encode %{
+ __ membar(MacroAssembler::StoreLoad, noreg);
+ %}
+ ins_pipe(long_memory_op);
+%}
+
//----------Register Move Instructions-----------------------------------------
// Cast Index to Pointer for unsafe natives
diff --git a/src/hotspot/cpu/arm/c1_globals_arm.hpp b/src/hotspot/cpu/arm/c1_globals_arm.hpp
index 1fe5f1a23ee..9db999e81b3 100644
--- a/src/hotspot/cpu/arm/c1_globals_arm.hpp
+++ b/src/hotspot/cpu/arm/c1_globals_arm.hpp
@@ -43,7 +43,6 @@ define_pd_global(bool, TieredCompilation, false);
define_pd_global(intx, CompileThreshold, 1500 );
define_pd_global(intx, OnStackReplacePercentage, 933 );
-define_pd_global(size_t, NewSizeThreadIncrease, 4*K );
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
define_pd_global(size_t, ReservedCodeCacheSize, 32*M );
define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M );
@@ -53,7 +52,6 @@ define_pd_global(bool, ProfileInterpreter, false);
define_pd_global(size_t, CodeCacheExpansionSize, 32*K );
define_pd_global(size_t, CodeCacheMinBlockLength, 1);
define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
-define_pd_global(bool, NeverActAsServerClassMachine, true);
define_pd_global(bool, CICompileOSR, true );
#endif // COMPILER2
define_pd_global(bool, UseTypeProfile, false);
diff --git a/src/hotspot/cpu/arm/c2_globals_arm.hpp b/src/hotspot/cpu/arm/c2_globals_arm.hpp
index 0849bd594f0..34da47792ae 100644
--- a/src/hotspot/cpu/arm/c2_globals_arm.hpp
+++ b/src/hotspot/cpu/arm/c2_globals_arm.hpp
@@ -47,7 +47,6 @@ define_pd_global(intx, ConditionalMoveLimit, 4);
// C2 gets to use all the float/double registers
define_pd_global(intx, FreqInlineSize, 175);
define_pd_global(intx, InteriorEntryAlignment, 16); // = CodeEntryAlignment
-define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
// The default setting 16/16 seems to work best.
// (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.)
//define_pd_global(intx, OptoLoopAlignment, 16); // = 4*wordSize
@@ -94,7 +93,4 @@ define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed
-// Ergonomics related flags
-define_pd_global(bool, NeverActAsServerClassMachine, false);
-
#endif // CPU_ARM_C2_GLOBALS_ARM_HPP
diff --git a/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp
index 2427d46cafa..5d63035ac69 100644
--- a/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp
@@ -67,9 +67,7 @@ void CardTableBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet d
void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {
BLOCK_COMMENT("CardTablePostBarrier");
- BarrierSet* bs = BarrierSet::barrier_set();
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
- CardTable* ct = ctbs->card_table();
+ CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set();
Label L_cardtable_loop, L_done;
@@ -83,7 +81,7 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
__ sub(count, count, addr); // nb of cards
// warning: Rthread has not been preserved
- __ mov_address(tmp, (address) ct->byte_map_base());
+ __ mov_address(tmp, (address)ctbs->card_table_base_const());
__ add(addr,tmp, addr);
Register zero = __ zero_register(tmp);
@@ -122,8 +120,7 @@ void CardTableBarrierSetAssembler::store_check_part1(MacroAssembler* masm, Regis
assert(bs->kind() == BarrierSet::CardTableBarrierSet,
"Wrong barrier set kind");
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
- CardTable* ct = ctbs->card_table();
+ CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set();
// Load card table base address.
@@ -140,7 +137,7 @@ void CardTableBarrierSetAssembler::store_check_part1(MacroAssembler* masm, Regis
Possible cause is a cache miss (card table base address resides in a
rarely accessed area of thread descriptor).
*/
- __ mov_address(card_table_base, (address)ct->byte_map_base());
+ __ mov_address(card_table_base, (address)ctbs->card_table_base_const());
}
// The 2nd part of the store check.
@@ -170,8 +167,8 @@ void CardTableBarrierSetAssembler::store_check_part2(MacroAssembler* masm, Regis
void CardTableBarrierSetAssembler::set_card(MacroAssembler* masm, Register card_table_base, Address card_table_addr, Register tmp) {
CardTableBarrierSet* ctbs = barrier_set_cast(BarrierSet::barrier_set());
- CardTable* ct = ctbs->card_table();
- if ((((uintptr_t)ct->byte_map_base() & 0xff) == 0)) {
+
+ if ((((uintptr_t)ctbs->card_table_base_const() & 0xff) == 0)) {
// Card table is aligned so the lowest byte of the table address base is zero.
// This works only if the code is not saved for later use, possibly
// in a context where the base would no longer be aligned.
diff --git a/src/hotspot/cpu/arm/globals_arm.hpp b/src/hotspot/cpu/arm/globals_arm.hpp
index 363a9a2c25c..c568ea04122 100644
--- a/src/hotspot/cpu/arm/globals_arm.hpp
+++ b/src/hotspot/cpu/arm/globals_arm.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -37,7 +37,7 @@ define_pd_global(bool, TrapBasedNullChecks, false); // Not needed
define_pd_global(bool, DelayCompilerStubsGeneration, false); // No need - only few compiler's stubs
define_pd_global(size_t, CodeCacheSegmentSize, 64);
-define_pd_global(intx, CodeEntryAlignment, 16);
+define_pd_global(uint, CodeEntryAlignment, 16);
define_pd_global(intx, OptoLoopAlignment, 16);
#define DEFAULT_STACK_YELLOW_PAGES (2)
diff --git a/src/hotspot/cpu/arm/interp_masm_arm.cpp b/src/hotspot/cpu/arm/interp_masm_arm.cpp
index 23ecea24eb2..aee407864ee 100644
--- a/src/hotspot/cpu/arm/interp_masm_arm.cpp
+++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1210,7 +1210,7 @@ void InterpreterMacroAssembler::profile_final_call(Register mdp) {
// Sets mdp, blows Rtemp.
-void InterpreterMacroAssembler::profile_virtual_call(Register mdp, Register receiver, bool receiver_can_be_null) {
+void InterpreterMacroAssembler::profile_virtual_call(Register mdp, Register receiver) {
assert_different_registers(mdp, receiver, Rtemp);
if (ProfileInterpreter) {
@@ -1219,19 +1219,8 @@ void InterpreterMacroAssembler::profile_virtual_call(Register mdp, Register rece
// If no method data exists, go to profile_continue.
test_method_data_pointer(mdp, profile_continue);
- Label skip_receiver_profile;
- if (receiver_can_be_null) {
- Label not_null;
- cbnz(receiver, not_null);
- // We are making a call. Increment the count for null receiver.
- increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp);
- b(skip_receiver_profile);
- bind(not_null);
- }
-
// Record the receiver type.
record_klass_in_profile(receiver, mdp, Rtemp, true);
- bind(skip_receiver_profile);
// The method data pointer needs to be updated to reflect the new target.
update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
diff --git a/src/hotspot/cpu/arm/interp_masm_arm.hpp b/src/hotspot/cpu/arm/interp_masm_arm.hpp
index 530be1c577e..147cd252b2c 100644
--- a/src/hotspot/cpu/arm/interp_masm_arm.hpp
+++ b/src/hotspot/cpu/arm/interp_masm_arm.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -239,8 +239,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
void profile_call(Register mdp); // Sets mdp, blows Rtemp.
void profile_final_call(Register mdp); // Sets mdp, blows Rtemp.
- void profile_virtual_call(Register mdp, Register receiver, // Sets mdp, blows Rtemp.
- bool receiver_can_be_null = false);
+ void profile_virtual_call(Register mdp, Register receiver); // Sets mdp, blows Rtemp.
void profile_ret(Register mdp, Register return_bci); // Sets mdp, blows R0-R3/R0-R18, Rtemp, LR
void profile_null_seen(Register mdp); // Sets mdp.
void profile_typecheck(Register mdp, Register klass); // Sets mdp, blows Rtemp.
diff --git a/src/hotspot/cpu/arm/matcher_arm.hpp b/src/hotspot/cpu/arm/matcher_arm.hpp
index 6c818e1f20d..7978a5b7090 100644
--- a/src/hotspot/cpu/arm/matcher_arm.hpp
+++ b/src/hotspot/cpu/arm/matcher_arm.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -75,7 +75,6 @@
static bool narrow_klass_use_complex_address() {
NOT_LP64(ShouldNotCallThis());
- assert(UseCompressedClassPointers, "only for compressed klass code");
return false;
}
diff --git a/src/hotspot/cpu/arm/methodHandles_arm.cpp b/src/hotspot/cpu/arm/methodHandles_arm.cpp
index 3710fa33f36..2da14d8ffed 100644
--- a/src/hotspot/cpu/arm/methodHandles_arm.cpp
+++ b/src/hotspot/cpu/arm/methodHandles_arm.cpp
@@ -104,14 +104,13 @@ void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Registe
__ andr(temp, temp, (unsigned)java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
__ cmp(temp, ref_kind);
__ b(L, eq);
- { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
- jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
+ const char* msg = ref_kind_to_verify_msg(ref_kind);
if (ref_kind == JVM_REF_invokeVirtual ||
- ref_kind == JVM_REF_invokeSpecial)
+ ref_kind == JVM_REF_invokeSpecial) {
// could do this for all ref_kinds, but would explode assembly code size
- trace_method_handle(_masm, buf);
- __ stop(buf);
+ trace_method_handle(_masm, msg);
}
+ __ stop(msg);
BLOCK_COMMENT("} verify_ref_kind");
__ bind(L);
}
diff --git a/src/hotspot/cpu/arm/stubDeclarations_arm.hpp b/src/hotspot/cpu/arm/stubDeclarations_arm.hpp
index 5f768a205a5..5fb0d4e901f 100644
--- a/src/hotspot/cpu/arm/stubDeclarations_arm.hpp
+++ b/src/hotspot/cpu/arm/stubDeclarations_arm.hpp
@@ -29,7 +29,8 @@
#define STUBGEN_PREUNIVERSE_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(preuniverse, 500) \
do_stub(preuniverse, atomic_load_long) \
do_arch_entry(Arm, preuniverse, atomic_load_long, \
@@ -42,7 +43,8 @@
#define STUBGEN_INITIAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(initial, 9000) \
do_stub(initial, idiv_irem) \
do_arch_entry(Arm, initial, idiv_irem, \
@@ -51,14 +53,16 @@
#define STUBGEN_CONTINUATION_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(continuation, 2000) \
#define STUBGEN_COMPILER_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(compiler, 22000) \
do_stub(compiler, partial_subtype_check) \
do_arch_entry(Arm, compiler, partial_subtype_check, \
@@ -68,7 +72,8 @@
#define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(final, 22000) \
diff --git a/src/hotspot/cpu/arm/stubGenerator_arm.cpp b/src/hotspot/cpu/arm/stubGenerator_arm.cpp
index a36ad3a0c47..a705b15eff5 100644
--- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp
@@ -3211,7 +3211,7 @@ class StubGenerator: public StubCodeGenerator {
}
public:
- StubGenerator(CodeBuffer* code, BlobId blob_id) : StubCodeGenerator(code, blob_id) {
+ StubGenerator(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) : StubCodeGenerator(code, blob_id, stub_data) {
switch(blob_id) {
case BlobId::stubgen_preuniverse_id:
generate_preuniverse_stubs();
@@ -3235,8 +3235,8 @@ class StubGenerator: public StubCodeGenerator {
}
}; // end class declaration
-void StubGenerator_generate(CodeBuffer* code, BlobId blob_id) {
- StubGenerator g(code, blob_id);
+void StubGenerator_generate(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) {
+ StubGenerator g(code, blob_id, stub_data);
}
// implementation of internal development flag
diff --git a/src/hotspot/cpu/arm/stubRoutines_arm.cpp b/src/hotspot/cpu/arm/stubRoutines_arm.cpp
index a4f2b5e1bd9..38a9b298562 100644
--- a/src/hotspot/cpu/arm/stubRoutines_arm.cpp
+++ b/src/hotspot/cpu/arm/stubRoutines_arm.cpp
@@ -32,10 +32,16 @@
#define DEFINE_ARCH_ENTRY_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
address StubRoutines:: arch :: STUB_FIELD_NAME(field_name) = CAST_FROM_FN_PTR(address, init_function);
-STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY, DEFINE_ARCH_ENTRY_INIT)
+STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY, DEFINE_ARCH_ENTRY_INIT, DEFINE_ARCH_ENTRY_ARRAY)
#undef DEFINE_ARCH_ENTRY_INIT
#undef DEFINE_ARCH_ENTRY
address StubRoutines::crc_table_addr() { ShouldNotCallThis(); return nullptr; }
address StubRoutines::crc32c_table_addr() { ShouldNotCallThis(); return nullptr; }
+
+#if INCLUDE_CDS
+// nothing to do for arm
+void StubRoutines::init_AOTAddressTable() {
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/arm/stubRoutines_arm.hpp b/src/hotspot/cpu/arm/stubRoutines_arm.hpp
index 45ab10d14f9..29d96d0e653 100644
--- a/src/hotspot/cpu/arm/stubRoutines_arm.hpp
+++ b/src/hotspot/cpu/arm/stubRoutines_arm.hpp
@@ -55,9 +55,13 @@ class Arm {
#define DECLARE_ARCH_ENTRY_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
DECLARE_ARCH_ENTRY(arch, blob_name, stub_name, field_name, getter_name)
-private:
- STUBGEN_ARCH_ENTRIES_DO(DECLARE_ARCH_ENTRY, DECLARE_ARCH_ENTRY_INIT)
+#define DECLARE_ARCH_ENTRY_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ static address STUB_FIELD_NAME(field_name) [count] ;
+private:
+ STUBGEN_ARCH_ENTRIES_DO(DECLARE_ARCH_ENTRY, DECLARE_ARCH_ENTRY_INIT, DECLARE_ARCH_ENTRY_ARRAY)
+
+#undef DECLARE_ARCH_ENTRY_ARRAY
#undef DECLARE_ARCH_ENTRY_INIT
#undef DECLARE_ARCH_ENTRY
@@ -71,8 +75,12 @@ public:
#define DEFINE_ARCH_ENTRY_GETTER_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
DEFINE_ARCH_ENTRY_GETTER(arch, blob_name, stub_name, field_name, getter_name)
- STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY_GETTER, DEFINE_ARCH_ENTRY_GETTER_INIT)
+#define DEFINE_ARCH_ENTRY_GETTER_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ static address getter_name(int idx) { return STUB_FIELD_NAME(field_name) [idx] ; }
+ STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY_GETTER, DEFINE_ARCH_ENTRY_GETTER_INIT, DEFINE_ARCH_ENTRY_GETTER_ARRAY)
+
+#undef DEFINE_ARCH_ENTRY_GETTER_ARRAY
#undef DEFINE_ARCH_ENTRY_GETTER_INIT
#undef DEFINE_ARCH_ENTRY_GETTER
diff --git a/src/hotspot/cpu/ppc/assembler_ppc.hpp b/src/hotspot/cpu/ppc/assembler_ppc.hpp
index 15e38411482..378e01fc1cc 100644
--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -568,6 +568,9 @@ class Assembler : public AbstractAssembler {
XSCVDPHP_OPCODE= (60u << OPCODE_SHIFT | 347u << 2 | 17u << 16), // XX2-FORM
XXPERM_OPCODE = (60u << OPCODE_SHIFT | 26u << 3),
XXSEL_OPCODE = (60u << OPCODE_SHIFT | 3u << 4),
+ XSCMPEQDP_OPCODE=(60u << OPCODE_SHIFT | 3u << 3),
+ XSCMPGEDP_OPCODE=(60u << OPCODE_SHIFT | 19u << 3),
+ XSCMPGTDP_OPCODE=(60u << OPCODE_SHIFT | 11u << 3),
XXSPLTIB_OPCODE= (60u << OPCODE_SHIFT | 360u << 1),
XVDIVDP_OPCODE = (60u << OPCODE_SHIFT | 120u << 3),
XVABSSP_OPCODE = (60u << OPCODE_SHIFT | 409u << 2),
@@ -596,6 +599,9 @@ class Assembler : public AbstractAssembler {
XVMAXSP_OPCODE = (60u << OPCODE_SHIFT | 192u << 3),
XVMAXDP_OPCODE = (60u << OPCODE_SHIFT | 224u << 3),
+ XSMINJDP_OPCODE = (60u << OPCODE_SHIFT | 152u << 3),
+ XSMAXJDP_OPCODE = (60u << OPCODE_SHIFT | 144u << 3),
+
// Deliver A Random Number (introduced with POWER9)
DARN_OPCODE = (31u << OPCODE_SHIFT | 755u << 1),
@@ -1574,10 +1580,6 @@ class Assembler : public AbstractAssembler {
static bool is_nop(int x) {
return x == 0x60000000;
}
- // endgroup opcode for Power6
- static bool is_endgroup(int x) {
- return is_ori(x) && inv_ra_field(x) == 1 && inv_rs_field(x) == 1 && inv_d1_field(x) == 0;
- }
private:
@@ -1653,9 +1655,6 @@ class Assembler : public AbstractAssembler {
inline void ori_opt( Register d, int ui16);
inline void oris_opt(Register d, int ui16);
- // endgroup opcode for Power6
- inline void endgroup();
-
// count instructions
inline void cntlzw( Register a, Register s);
inline void cntlzw_( Register a, Register s);
@@ -2424,6 +2423,9 @@ class Assembler : public AbstractAssembler {
inline void xscvdphp( VectorSRegister d, VectorSRegister b);
inline void xxland( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xxsel( VectorSRegister d, VectorSRegister a, VectorSRegister b, VectorSRegister c);
+ inline void xscmpeqdp(VectorSRegister t, VectorSRegister a, VectorSRegister b); // Requires Power9
+ inline void xscmpgedp(VectorSRegister t, VectorSRegister a, VectorSRegister b); // Requires Power9
+ inline void xscmpgtdp(VectorSRegister t, VectorSRegister a, VectorSRegister b); // Requires Power9
inline void xxspltib( VectorSRegister d, int ui8);
inline void xvdivsp( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xvdivdp( VectorSRegister d, VectorSRegister a, VectorSRegister b);
@@ -2449,6 +2451,9 @@ class Assembler : public AbstractAssembler {
inline void xvrdpim( VectorSRegister d, VectorSRegister b);
inline void xvrdpip( VectorSRegister d, VectorSRegister b);
+ inline void xsminjdp( VectorSRegister d, VectorSRegister a, VectorSRegister b); // Requires Power 9
+ inline void xsmaxjdp( VectorSRegister d, VectorSRegister a, VectorSRegister b); // Requires Power 9
+
// The following functions do not match exactly the Java.math semantics.
inline void xvminsp( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xvmindp( VectorSRegister d, VectorSRegister a, VectorSRegister b);
diff --git a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
index 7e49ec7455d..d349bbc6f87 100644
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -253,8 +253,6 @@ inline void Assembler::mr( Register d, Register s) { Assembler::orr(d, s,
inline void Assembler::ori_opt( Register d, int ui16) { if (ui16!=0) Assembler::ori( d, d, ui16); }
inline void Assembler::oris_opt(Register d, int ui16) { if (ui16!=0) Assembler::oris(d, d, ui16); }
-inline void Assembler::endgroup() { Assembler::ori(R1, R1, 0); }
-
// count instructions
inline void Assembler::cntlzw( Register a, Register s) { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(0)); }
inline void Assembler::cntlzw_( Register a, Register s) { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(1)); }
@@ -908,6 +906,9 @@ inline void Assembler::xvrdpic( VectorSRegister d, VectorSRegister b)
inline void Assembler::xvrdpim( VectorSRegister d, VectorSRegister b) { emit_int32( XVRDPIM_OPCODE | vsrt(d) | vsrb(b)); }
inline void Assembler::xvrdpip( VectorSRegister d, VectorSRegister b) { emit_int32( XVRDPIP_OPCODE | vsrt(d) | vsrb(b)); }
+inline void Assembler::xsminjdp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XSMINJDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xsmaxjdp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XSMAXJDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+
inline void Assembler::xvminsp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMINSP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
inline void Assembler::xvmindp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMINDP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
inline void Assembler::xvmaxsp(VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XVMAXSP_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
@@ -923,6 +924,10 @@ inline void Assembler::xxmrghw( VectorSRegister d, VectorSRegister a, VectorSReg
inline void Assembler::xxmrglw( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXMRGHW_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
inline void Assembler::xxsel( VectorSRegister d, VectorSRegister a, VectorSRegister b, VectorSRegister c) { emit_int32( XXSEL_OPCODE | vsrt(d) | vsra(a) | vsrb(b) | vsrc(c)); }
+inline void Assembler::xscmpeqdp(VectorSRegister t, VectorSRegister a, VectorSRegister b) { emit_int32( XSCMPEQDP_OPCODE | vsrt(t) | vsra(a) | vsrb(b) );}
+inline void Assembler::xscmpgedp(VectorSRegister t, VectorSRegister a, VectorSRegister b) { emit_int32( XSCMPGEDP_OPCODE | vsrt(t) | vsra(a) | vsrb(b) );}
+inline void Assembler::xscmpgtdp(VectorSRegister t, VectorSRegister a, VectorSRegister b) { emit_int32( XSCMPGTDP_OPCODE | vsrt(t) | vsra(a) | vsrb(b) );}
+
// VSX Extended Mnemonics
inline void Assembler::xxspltd( VectorSRegister d, VectorSRegister a, int x) { xxpermdi(d, a, a, x ? 3 : 0); }
inline void Assembler::xxmrghd( VectorSRegister d, VectorSRegister a, VectorSRegister b) { xxpermdi(d, a, b, 0); }
diff --git a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
index 798451446e5..4d7af0e4a71 100644
--- a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2025 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -144,7 +144,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
if (len->is_valid()) {
stw(len, arrayOopDesc::length_offset_in_bytes(), obj);
- } else if (UseCompressedClassPointers && !UseCompactObjectHeaders) {
+ } else if (!UseCompactObjectHeaders) {
// Otherwise length is in the class gap.
store_klass_gap(obj);
}
diff --git a/src/hotspot/cpu/ppc/c1_globals_ppc.hpp b/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
index 77d9acd1cd1..c6fe15aac07 100644
--- a/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
@@ -51,8 +51,6 @@ define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M );
define_pd_global(size_t, CodeCacheExpansionSize, 32*K);
define_pd_global(size_t, CodeCacheMinBlockLength, 1);
define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
-define_pd_global(bool, NeverActAsServerClassMachine, true);
-define_pd_global(size_t, NewSizeThreadIncrease, 16*K);
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
#endif // !COMPILER2
diff --git a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
index edf348fdc50..8bbffc22c54 100644
--- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -600,19 +601,21 @@ void C2_MacroAssembler::count_positives(Register src, Register cnt, Register res
orr(tmp0, tmp2, tmp0);
and_(tmp0, tmp0, tmp1);
- bne(CR0, Lslow); // Found negative byte.
+ bne(CR0, Lslow); // Found negative byte.
addi(result, result, 16);
bdnz(Lfastloop);
bind(Lslow); // Fallback to slow version.
subf(tmp0, src, result); // Bytes known positive.
- subf_(tmp0, tmp0, cnt); // Remaining Bytes.
+ clrldi(tmp1, cnt, 32); // Clear garbage from upper 32 bits.
+ subf_(tmp0, tmp0, tmp1); // Remaining Bytes.
beq(CR0, Ldone);
mtctr(tmp0);
+
bind(Lloop);
lbz(tmp0, 0, result);
andi_(tmp0, tmp0, 0x80);
- bne(CR0, Ldone); // Found negative byte.
+ bne(CR0, Ldone); // Found negative byte.
addi(result, result, 1);
bdnz(Lloop);
@@ -664,3 +667,37 @@ void C2_MacroAssembler::reduceI(int opcode, Register dst, Register iSrc, VectorR
fn_scalar_op(opcode, dst, iSrc, R0); // dst <- op(iSrc, R0)
}
+// Works for single and double precision floats.
+// dst = (op1 cmp(cc) op2) ? src1 : src2;
+// Unordered semantics are the same as for CmpF3Node/CmpD3Node which implement the fcmpl/dcmpl bytecodes.
+// Comparing unordered values has the same result as when src1 is less than src2.
+// So dst = src1 for <, <=, != and dst = src2 for >, >=, ==.
+void C2_MacroAssembler::cmovF(int cc, VectorSRegister dst, VectorSRegister op1, VectorSRegister op2,
+ VectorSRegister src1, VectorSRegister src2, VectorSRegister tmp) {
+ // See operand cmpOp() for details.
+ bool invert_cond = (cc & 8) == 0; // invert reflects bcondCRbiIs0
+ auto cmp = (Assembler::Condition)(cc & 3);
+
+ switch(cmp) {
+ case Assembler::Condition::equal:
+ // Use false_result if "unordered".
+ xscmpeqdp(tmp, op1, op2);
+ break;
+ case Assembler::Condition::greater:
+ // Use false_result if "unordered".
+ xscmpgtdp(tmp, op1, op2);
+ break;
+ case Assembler::Condition::less:
+ // Use true_result if "unordered".
+ xscmpgedp(tmp, op1, op2);
+ invert_cond = !invert_cond;
+ break;
+ default:
+ assert(false, "unsupported compare condition: %d", cc);
+ ShouldNotReachHere();
+ }
+
+ VectorSRegister true_result = invert_cond ? src2 : src1;
+ VectorSRegister false_result = invert_cond ? src1 : src2;
+ xxsel(dst, false_result, true_result, tmp);
+}
diff --git a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp
index 5a114294c1f..e0dffec8396 100644
--- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp
@@ -74,5 +74,7 @@
void count_positives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);
void reduceI(int opcode, Register dst, Register iSrc, VectorRegister vSrc, VectorRegister vTmp1, VectorRegister vTmp2);
+ void cmovF(int cc, VectorSRegister dst, VectorSRegister op1, VectorSRegister op2,
+ VectorSRegister src1, VectorSRegister src2, VectorSRegister tmp);
#endif // CPU_PPC_C2_MACROASSEMBLER_PPC_HPP
diff --git a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
index d5a0ff10994..e4942fa1850 100644
--- a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2019 SAP SE. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -44,10 +44,9 @@ define_pd_global(intx, CompileThreshold, 10000);
define_pd_global(intx, OnStackReplacePercentage, 140);
define_pd_global(intx, ConditionalMoveLimit, 3);
-define_pd_global(intx, FreqInlineSize, 175);
+define_pd_global(intx, FreqInlineSize, 325);
define_pd_global(intx, MinJumpTableSize, 10);
define_pd_global(intx, InteriorEntryAlignment, 16);
-define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, RegisterCostAreaRatio, 16000);
define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
@@ -91,7 +90,4 @@ define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
define_pd_global(bool, TrapBasedRangeChecks, true);
-// Ergonomics related flags
-define_pd_global(bool, NeverActAsServerClassMachine, false);
-
#endif // CPU_PPC_C2_GLOBALS_PPC_HPP
diff --git a/src/hotspot/cpu/ppc/disassembler_ppc.cpp b/src/hotspot/cpu/ppc/disassembler_ppc.cpp
index fb3cb50cdec..2e16e1a301f 100644
--- a/src/hotspot/cpu/ppc/disassembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/disassembler_ppc.cpp
@@ -119,9 +119,6 @@ address Disassembler::decode_instruction0(address here, outputStream * st, addre
} else if (instruction == 0xbadbabe) {
st->print(".data 0xbadbabe");
next = here + Assembler::instr_len(here);
- } else if (Assembler::is_endgroup(instruction)) {
- st->print("endgroup");
- next = here + Assembler::instr_len(here);
} else {
next = here;
}
diff --git a/src/hotspot/cpu/ppc/downcallLinker_ppc.cpp b/src/hotspot/cpu/ppc/downcallLinker_ppc.cpp
index f12d25ac611..d149fc33ac3 100644
--- a/src/hotspot/cpu/ppc/downcallLinker_ppc.cpp
+++ b/src/hotspot/cpu/ppc/downcallLinker_ppc.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2020, 2025 SAP SE. All rights reserved.
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -135,10 +135,10 @@ void DowncallLinker::StubGenerator::generate() {
bool should_save_return_value = !_needs_return_buffer;
RegSpiller out_reg_spiller(_output_registers);
- int spill_offset = -1;
+ int out_spill_offset = -1;
if (should_save_return_value) {
- spill_offset = frame::native_abi_reg_args_size;
+ out_spill_offset = frame::native_abi_reg_args_size;
// Spill area can be shared with additional out args (>8),
// since it is only used after the call.
int frame_size_including_spill_area = frame::native_abi_reg_args_size + out_reg_spiller.spill_size_bytes();
@@ -170,6 +170,18 @@ void DowncallLinker::StubGenerator::generate() {
ArgumentShuffle arg_shuffle(filtered_java_regs, out_regs, _abi._scratch1);
+ // Need to spill for state capturing runtime call.
+ // The area spilled into is distinct from the capture state buffer.
+ RegSpiller in_reg_spiller(out_regs);
+ int in_spill_offset = -1;
+ if (_captured_state_mask != 0) {
+ // The spill area cannot be shared with the out_spill since
+ // spilling needs to happen before the call. Allocate a new
+ // region in the stack for this spill space.
+ in_spill_offset = allocated_frame_size;
+ allocated_frame_size += in_reg_spiller.spill_size_bytes();
+ }
+
#ifndef PRODUCT
LogTarget(Trace, foreign, downcall) lt;
if (lt.is_enabled()) {
@@ -211,6 +223,21 @@ void DowncallLinker::StubGenerator::generate() {
arg_shuffle.generate(_masm, as_VMStorage(callerSP), frame::jit_out_preserve_size, frame::native_abi_minframe_size);
__ block_comment("} argument shuffle");
+ if (_captured_state_mask != 0) {
+ assert(in_spill_offset != -1, "must be");
+ __ block_comment("{ load initial thread local");
+ in_reg_spiller.generate_spill(_masm, in_spill_offset);
+
+ // Copy the contents of the capture state buffer into thread local
+ __ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state_pre), R0);
+ __ ld(R3_ARG1, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER), R1_SP);
+ __ load_const_optimized(R4_ARG2, _captured_state_mask, R0);
+ __ call_c(call_target_address);
+
+ in_reg_spiller.generate_fill(_masm, in_spill_offset);
+ __ block_comment("} load initial thread local");
+ }
+
__ call_c(call_target_address);
if (_needs_return_buffer) {
@@ -247,16 +274,16 @@ void DowncallLinker::StubGenerator::generate() {
__ block_comment("{ save thread local");
if (should_save_return_value) {
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
}
- __ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state), R0);
+ __ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state_post), R0);
__ ld(R3_ARG1, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER), R1_SP);
__ load_const_optimized(R4_ARG2, _captured_state_mask, R0);
__ call_c(call_target_address);
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
}
__ block_comment("} save thread local");
@@ -310,7 +337,7 @@ void DowncallLinker::StubGenerator::generate() {
if (should_save_return_value) {
// Need to save the native result registers around any runtime calls.
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
}
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, JavaThread::check_special_condition_for_native_trans), R0);
@@ -318,7 +345,7 @@ void DowncallLinker::StubGenerator::generate() {
__ call_c(call_target_address);
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
}
__ b(L_after_safepoint_poll);
@@ -330,14 +357,14 @@ void DowncallLinker::StubGenerator::generate() {
__ bind(L_reguard);
if (should_save_return_value) {
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
}
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, SharedRuntime::reguard_yellow_pages), R0);
__ call_c(call_target_address);
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
}
__ b(L_after_reguard);
diff --git a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
index bb711f2d053..3c05f950d0c 100644
--- a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2025 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -137,10 +137,10 @@ inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address
// Return unique id for this frame. The id must have a value where we
// can distinguish identity and younger/older relationship. null
-// represents an invalid (incomparable) frame.
+// represents an invalid (incomparable) frame. Should not be called for heap frames.
inline intptr_t* frame::id(void) const {
// Use _fp. _sp or _unextended_sp wouldn't be correct due to resizing.
- return _fp;
+ return real_fp();
}
// Return true if this frame is older (less recent activation) than
@@ -319,6 +319,9 @@ inline frame frame::sender(RegisterMap* map) const {
StackWatermarkSet::on_iteration(map->thread(), result);
}
+ // Calling frame::id() is currently not supported for heap frames.
+ assert(result._on_heap || this->_on_heap || result.is_older(this->id()), "Must be");
+
return result;
}
diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp
index 8712c75711d..3692b247989 100644
--- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -179,6 +179,11 @@ void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Re
__ ld(dst, 0, dst); // Resolve (untagged) jobject.
}
+void BarrierSetAssembler::try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
+ // Load the oop from the weak handle.
+ __ ld(obj, 0, obj);
+}
+
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Register tmp) {
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
assert_different_registers(tmp, R0);
diff --git a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp
index 2bf26bd5010..8112542d761 100644
--- a/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/gc/shared/barrierSetAssembler_ppc.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, 2022 SAP SE. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -70,6 +70,12 @@ public:
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register dst, Register jni_env,
Register obj, Register tmp, Label& slowpath);
+ // Can be used in nmethods including native wrappers.
+ // Attention: obj will only be valid until next safepoint (no SATB barrier).
+ // TODO: maybe rename to try_peek_weak_handle on all platforms (try: operation may fail, peek: obj is not kept alive)
+ // (other platforms currently use it for C2 only: try_resolve_weak_handle_in_c2)
+ virtual void try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
+
virtual void barrier_stubs_init() {}
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::stw_instruction_and_data_patch; }
diff --git a/src/hotspot/cpu/ppc/gc/shared/cardTableBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shared/cardTableBarrierSetAssembler_ppc.cpp
index 7404f7e2e5c..297ce57a394 100644
--- a/src/hotspot/cpu/ppc/gc/shared/cardTableBarrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/shared/cardTableBarrierSetAssembler_ppc.cpp
@@ -103,8 +103,7 @@ void CardTableBarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Registe
void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr,
Register count, Register preserve) {
- CardTableBarrierSet* ctbs = barrier_set_cast(BarrierSet::barrier_set());
- CardTable* ct = ctbs->card_table();
+ CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set();
assert_different_registers(addr, count, R0);
Label Lskip_loop, Lstore_loop;
@@ -117,7 +116,7 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
__ srdi(addr, addr, CardTable::card_shift());
__ srdi(count, count, CardTable::card_shift());
__ subf(count, addr, count);
- __ add_const_optimized(addr, addr, (address)ct->byte_map_base(), R0);
+ __ add_const_optimized(addr, addr, (address)ctbs->card_table_base_const(), R0);
__ addi(count, count, 1);
__ li(R0, 0);
__ mtctr(count);
@@ -140,8 +139,8 @@ void CardTableBarrierSetAssembler::card_table_write(MacroAssembler* masm,
}
void CardTableBarrierSetAssembler::card_write_barrier_post(MacroAssembler* masm, Register store_addr, Register tmp) {
- CardTableBarrierSet* bs = barrier_set_cast(BarrierSet::barrier_set());
- card_table_write(masm, bs->card_table()->byte_map_base(), tmp, store_addr);
+ CardTableBarrierSet* bs = CardTableBarrierSet::barrier_set();
+ card_table_write(masm, bs->card_table_base_const(), tmp, store_addr);
}
void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
index 1f1bc7622ed..8e99d23cc99 100644
--- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.cpp
@@ -1,6 +1,7 @@
/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2025, Red Hat, Inc. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -50,14 +51,14 @@
#define __ masm->
-void ShenandoahBarrierSetAssembler::satb_write_barrier(MacroAssembler *masm,
- Register base, RegisterOrConstant ind_or_offs,
- Register tmp1, Register tmp2, Register tmp3,
- MacroAssembler::PreservationLevel preservation_level) {
+void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler *masm,
+ Register base, RegisterOrConstant ind_or_offs,
+ Register tmp1, Register tmp2, Register tmp3,
+ MacroAssembler::PreservationLevel preservation_level) {
if (ShenandoahSATBBarrier) {
- __ block_comment("satb_write_barrier (shenandoahgc) {");
- satb_write_barrier_impl(masm, 0, base, ind_or_offs, tmp1, tmp2, tmp3, preservation_level);
- __ block_comment("} satb_write_barrier (shenandoahgc)");
+ __ block_comment("satb_barrier (shenandoahgc) {");
+ satb_barrier_impl(masm, 0, base, ind_or_offs, tmp1, tmp2, tmp3, preservation_level);
+ __ block_comment("} satb_barrier (shenandoahgc)");
}
}
@@ -198,11 +199,12 @@ void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, Dec
// In "load mode", this register acts as a temporary register and must
// thus not be 'noreg'. In "preloaded mode", its content will be sustained.
// tmp1/tmp2: Temporary registers, one of which must be non-volatile in "preloaded mode".
-void ShenandoahBarrierSetAssembler::satb_write_barrier_impl(MacroAssembler *masm, DecoratorSet decorators,
- Register base, RegisterOrConstant ind_or_offs,
- Register pre_val,
- Register tmp1, Register tmp2,
- MacroAssembler::PreservationLevel preservation_level) {
+void ShenandoahBarrierSetAssembler::satb_barrier_impl(MacroAssembler *masm, DecoratorSet decorators,
+ Register base, RegisterOrConstant ind_or_offs,
+ Register pre_val,
+ Register tmp1, Register tmp2,
+ MacroAssembler::PreservationLevel preservation_level) {
+ assert(ShenandoahSATBBarrier, "Should be checked by caller");
assert_different_registers(tmp1, tmp2, pre_val, noreg);
Label skip_barrier;
@@ -574,13 +576,13 @@ void ShenandoahBarrierSetAssembler::load_at(
if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
if (ShenandoahSATBBarrier) {
__ block_comment("keep_alive_barrier (shenandoahgc) {");
- satb_write_barrier_impl(masm, 0, noreg, noreg, dst, tmp1, tmp2, preservation_level);
+ satb_barrier_impl(masm, 0, noreg, noreg, dst, tmp1, tmp2, preservation_level);
__ block_comment("} keep_alive_barrier (shenandoahgc)");
}
}
}
-void ShenandoahBarrierSetAssembler::store_check(MacroAssembler* masm, Register base, RegisterOrConstant ind_or_offs, Register tmp) {
+void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register base, RegisterOrConstant ind_or_offs, Register tmp) {
assert(ShenandoahCardBarrier, "Should have been checked by caller");
assert_different_registers(base, tmp, R0);
@@ -603,21 +605,33 @@ void ShenandoahBarrierSetAssembler::store_at(MacroAssembler *masm, DecoratorSet
Register base, RegisterOrConstant ind_or_offs, Register val,
Register tmp1, Register tmp2, Register tmp3,
MacroAssembler::PreservationLevel preservation_level) {
- if (is_reference_type(type)) {
- if (ShenandoahSATBBarrier) {
- satb_write_barrier(masm, base, ind_or_offs, tmp1, tmp2, tmp3, preservation_level);
- }
+ // 1: non-reference types require no barriers
+ if (!is_reference_type(type)) {
+ BarrierSetAssembler::store_at(masm, decorators, type,
+ base, ind_or_offs,
+ val,
+ tmp1, tmp2, tmp3,
+ preservation_level);
+ return;
}
+ bool storing_non_null = (val != noreg);
+
+ // 2: pre-barrier: SATB needs the previous value
+ if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
+ satb_barrier(masm, base, ind_or_offs, tmp1, tmp2, tmp3, preservation_level);
+ }
+
+ // Store!
BarrierSetAssembler::store_at(masm, decorators, type,
base, ind_or_offs,
val,
tmp1, tmp2, tmp3,
preservation_level);
- // No need for post barrier if storing null
- if (ShenandoahCardBarrier && is_reference_type(type) && val != noreg) {
- store_check(masm, base, ind_or_offs, tmp1);
+ // 3: post-barrier: card barrier needs store address
+ if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
+ card_barrier(masm, base, ind_or_offs, tmp1);
}
}
@@ -649,6 +663,31 @@ void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler
__ block_comment("} try_resolve_jobject_in_native (shenandoahgc)");
}
+void ShenandoahBarrierSetAssembler::try_resolve_weak_handle(MacroAssembler *masm, Register obj,
+ Register tmp, Label &slow_path) {
+ __ block_comment("try_resolve_weak_handle (shenandoahgc) {");
+
+ assert_different_registers(obj, tmp);
+
+ Label done;
+
+ // Resolve weak handle using the standard implementation.
+ BarrierSetAssembler::try_resolve_weak_handle(masm, obj, tmp, slow_path);
+
+ // Check if the reference is null, and if it is, take the fast path.
+ __ cmpdi(CR0, obj, 0);
+ __ beq(CR0, done);
+
+ // Check if the heap is under weak-reference/roots processing, in
+ // which case we need to take the slow path.
+ __ lbz(tmp, in_bytes(ShenandoahThreadLocalData::gc_state_offset()), R16_thread);
+ __ andi_(tmp, tmp, ShenandoahHeap::WEAK_ROOTS);
+ __ bne(CR0, slow_path);
+ __ bind(done);
+
+ __ block_comment("} try_resolve_weak_handle (shenandoahgc)");
+}
+
// Special shenandoah CAS implementation that handles false negatives due
// to concurrent evacuation. That is, the CAS operation is intended to succeed in
// the following scenarios (success criteria):
@@ -771,9 +810,6 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler *masm, Register b
void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register preserve) {
assert(ShenandoahCardBarrier, "Should have been checked by caller");
-
- ShenandoahBarrierSet* bs = ShenandoahBarrierSet::barrier_set();
- CardTable* ct = bs->card_table();
assert_different_registers(addr, count, R0);
Label L_skip_loop, L_store_loop;
diff --git a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp
index b058dcf1a2e..58180c49642 100644
--- a/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/gc/shenandoah/shenandoahBarrierSetAssembler_ppc.hpp
@@ -1,6 +1,7 @@
/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
- * Copyright (c) 2012, 2022 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -45,15 +46,15 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
private:
/* ==== Actual barrier implementations ==== */
- void satb_write_barrier_impl(MacroAssembler* masm, DecoratorSet decorators,
- Register base, RegisterOrConstant ind_or_offs,
- Register pre_val,
- Register tmp1, Register tmp2,
- MacroAssembler::PreservationLevel preservation_level);
+ void satb_barrier_impl(MacroAssembler* masm, DecoratorSet decorators,
+ Register base, RegisterOrConstant ind_or_offs,
+ Register pre_val,
+ Register tmp1, Register tmp2,
+ MacroAssembler::PreservationLevel preservation_level);
- void store_check(MacroAssembler* masm,
- Register base, RegisterOrConstant ind_or_offs,
- Register tmp);
+ void card_barrier(MacroAssembler* masm,
+ Register base, RegisterOrConstant ind_or_offs,
+ Register tmp);
void load_reference_barrier_impl(MacroAssembler* masm, DecoratorSet decorators,
Register base, RegisterOrConstant ind_or_offs,
@@ -85,10 +86,10 @@ public:
#endif
/* ==== Available barriers (facades of the actual implementations) ==== */
- void satb_write_barrier(MacroAssembler* masm,
- Register base, RegisterOrConstant ind_or_offs,
- Register tmp1, Register tmp2, Register tmp3,
- MacroAssembler::PreservationLevel preservation_level);
+ void satb_barrier(MacroAssembler* masm,
+ Register base, RegisterOrConstant ind_or_offs,
+ Register tmp1, Register tmp2, Register tmp3,
+ MacroAssembler::PreservationLevel preservation_level);
void load_reference_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register base, RegisterOrConstant ind_or_offs,
@@ -121,6 +122,8 @@ public:
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register dst, Register jni_env,
Register obj, Register tmp, Label& slowpath);
+
+ virtual void try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
};
#endif // CPU_PPC_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_PPC_HPP
diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp
index 0aa5858c8e6..3e74dfb88cb 100644
--- a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2021, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -627,6 +627,19 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, R
__ block_comment("} try_resolve_jobject_in_native (zgc)");
}
+void ZBarrierSetAssembler::try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
+ // Resolve weak handle using the standard implementation.
+ BarrierSetAssembler::try_resolve_weak_handle(masm, obj, tmp, slow_path);
+
+ // Check if the oop is bad, in which case we need to take the slow path.
+ __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatMarkBadMask);
+ __ andi_(R0, obj, barrier_Relocation::unpatched);
+ __ bne(CR0, slow_path);
+
+ // Oop is okay, so we uncolor it.
+ __ srdi(obj, obj, ZPointerLoadShift);
+}
+
#undef __
#ifdef COMPILER1
diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp
index 27203e7b01c..655184cf6a3 100644
--- a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2021, 2022 SAP SE. All rights reserved.
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -72,6 +72,8 @@ public:
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register dst, Register jni_env,
Register obj, Register tmp, Label& slowpath);
+ virtual void try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
+
virtual void check_oop(MacroAssembler *masm, Register obj, const char* msg);
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_instruction_and_data_patch; }
diff --git a/src/hotspot/cpu/ppc/globals_ppc.hpp b/src/hotspot/cpu/ppc/globals_ppc.hpp
index 41a8e821ada..927a8cc2be3 100644
--- a/src/hotspot/cpu/ppc/globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/globals_ppc.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -60,7 +60,7 @@ define_pd_global(bool, VMContinuations, true);
// Use large code-entry alignment.
define_pd_global(size_t, CodeCacheSegmentSize, 128);
-define_pd_global(intx, CodeEntryAlignment, 64);
+define_pd_global(uint, CodeEntryAlignment, 64);
define_pd_global(intx, OptoLoopAlignment, 16);
define_pd_global(intx, InlineSmallCode, 1500);
diff --git a/src/hotspot/cpu/ppc/icache_ppc.cpp b/src/hotspot/cpu/ppc/icache_ppc.cpp
index 05ad3c7a30d..f3d51bad18c 100644
--- a/src/hotspot/cpu/ppc/icache_ppc.cpp
+++ b/src/hotspot/cpu/ppc/icache_ppc.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 SAP SE. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
*/
#include "runtime/icache.hpp"
+#include "runtime/vm_version.hpp"
// Use inline assembler to implement icache flush.
int ICache::ppc64_flush_icache(address start, int lines, int magic) {
@@ -67,6 +68,9 @@ int ICache::ppc64_flush_icache(address start, int lines, int magic) {
void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
+ guarantee(VM_Version::get_icache_line_size() >= ICache::line_size,
+ "processors with smaller cache line size are no longer supported");
+
*flush_icache_stub = (ICache::flush_icache_stub_t)ICache::ppc64_flush_icache;
// First call to flush itself.
diff --git a/src/hotspot/cpu/ppc/icache_ppc.hpp b/src/hotspot/cpu/ppc/icache_ppc.hpp
index d348cad1c72..024f706182a 100644
--- a/src/hotspot/cpu/ppc/icache_ppc.hpp
+++ b/src/hotspot/cpu/ppc/icache_ppc.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2013 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,9 +35,8 @@ class ICache : public AbstractICache {
public:
enum {
- // Actually, cache line size is 64, but keeping it as it is to be
- // on the safe side on ALL PPC64 implementations.
- log2_line_size = 5,
+ // Cache line size is 128 on all supported PPC64 implementations.
+ log2_line_size = 7,
line_size = 1 << log2_line_size
};
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
index 4ea33ebaf63..275ff92c699 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2025 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -258,7 +258,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
void profile_not_taken_branch(Register scratch1, Register scratch2);
void profile_call(Register scratch1, Register scratch2);
void profile_final_call(Register scratch1, Register scratch2);
- void profile_virtual_call(Register Rreceiver, Register Rscratch1, Register Rscratch2, bool receiver_can_be_null);
+ void profile_virtual_call(Register Rreceiver, Register Rscratch1, Register Rscratch2);
void profile_typecheck(Register Rklass, Register Rscratch1, Register Rscratch2);
void profile_ret(TosState state, Register return_bci, Register scratch1, Register scratch2);
void profile_switch_default(Register scratch1, Register scratch2);
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
index f7bf457f72c..56eade8e533 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2025 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -1340,28 +1340,15 @@ void InterpreterMacroAssembler::profile_final_call(Register scratch1, Register s
// Count a virtual call in the bytecodes.
void InterpreterMacroAssembler::profile_virtual_call(Register Rreceiver,
Register Rscratch1,
- Register Rscratch2,
- bool receiver_can_be_null) {
+ Register Rscratch2) {
if (!ProfileInterpreter) { return; }
Label profile_continue;
// If no method data exists, go to profile_continue.
test_method_data_pointer(profile_continue);
- Label skip_receiver_profile;
- if (receiver_can_be_null) {
- Label not_null;
- cmpdi(CR0, Rreceiver, 0);
- bne(CR0, not_null);
- // We are making a call. Increment the count for null receiver.
- increment_mdp_data_at(in_bytes(CounterData::count_offset()), Rscratch1, Rscratch2);
- b(skip_receiver_profile);
- bind(not_null);
- }
-
// Record the receiver type.
record_klass_in_profile(Rreceiver, Rscratch1, Rscratch2);
- bind(skip_receiver_profile);
// The method data pointer needs to be updated to reflect the new target.
update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
index 809285afddb..5fbcce94029 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -42,6 +42,7 @@
#include "runtime/icache.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/objectMonitor.hpp"
+#include "runtime/objectMonitorTable.hpp"
#include "runtime/os.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/safepointMechanism.hpp"
@@ -482,7 +483,7 @@ void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address des
// variant 3, far cond branch to the next instruction, already patched to nops:
//
// nop
- // endgroup
+ // nop
// SKIP/DEST:
//
return;
@@ -499,7 +500,7 @@ void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address des
if (is_bc_far_variant2_at(instruction_addr) && dest == instruction_addr + 8) {
// Far branch to next instruction: Optimize it by patching nops (produce variant 3).
masm.nop();
- masm.endgroup();
+ masm.nop();
} else {
if (is_bc_far_variant1_at(instruction_addr)) {
// variant 1, the 1st instruction contains the destination address:
@@ -2756,39 +2757,54 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
addi(owner_addr, mark, in_bytes(ObjectMonitor::owner_offset()) - monitor_tag);
mark = noreg;
} else {
+ const Register tmp3_bucket = tmp3;
+ const Register tmp2_hash = tmp2;
Label monitor_found;
- Register cache_addr = tmp2;
- // Load cache address
- addi(cache_addr, R16_thread, in_bytes(JavaThread::om_cache_oops_offset()));
+ // Save the mark, we might need it to extract the hash.
+ mr(tmp2_hash, mark);
- const int num_unrolled = 2;
+ // Look for the monitor in the om_cache.
+
+ ByteSize cache_offset = JavaThread::om_cache_oops_offset();
+ ByteSize monitor_offset = OMCache::oop_to_monitor_difference();
+ const int num_unrolled = OMCache::CAPACITY;
for (int i = 0; i < num_unrolled; i++) {
- ld(R0, 0, cache_addr);
+ ld(R0, in_bytes(cache_offset), R16_thread);
+ ld(monitor, in_bytes(cache_offset + monitor_offset), R16_thread);
cmpd(CR0, R0, obj);
beq(CR0, monitor_found);
- addi(cache_addr, cache_addr, in_bytes(OMCache::oop_to_oop_difference()));
+ cache_offset = cache_offset + OMCache::oop_to_oop_difference();
}
- Label loop;
+ // Look for the monitor in the table.
- // Search for obj in cache.
- bind(loop);
+ // Get the hash code.
+ srdi(tmp2_hash, tmp2_hash, markWord::hash_shift);
- // Check for match.
- ld(R0, 0, cache_addr);
- cmpd(CR0, R0, obj);
- beq(CR0, monitor_found);
+ // Get the table and calculate the bucket's address
+ int simm16_rest = load_const_optimized(tmp3, ObjectMonitorTable::current_table_address(), R0, true);
+ ld_ptr(tmp3, simm16_rest, tmp3);
+ ld(tmp1, in_bytes(ObjectMonitorTable::table_capacity_mask_offset()), tmp3);
+ andr(tmp2_hash, tmp2_hash, tmp1);
+ ld(tmp3_bucket, in_bytes(ObjectMonitorTable::table_buckets_offset()), tmp3);
- // Search until null encountered, guaranteed _null_sentinel at end.
- addi(cache_addr, cache_addr, in_bytes(OMCache::oop_to_oop_difference()));
- cmpdi(CR1, R0, 0);
- bne(CR1, loop);
- // Cache Miss, CR0.NE set from cmp above
- b(slow_path);
+ // Read the monitor from the bucket.
+ sldi(tmp2_hash, tmp2_hash, LogBytesPerWord);
+ ldx(monitor, tmp3_bucket, tmp2_hash);
+
+ // Check if the monitor in the bucket is special (empty, tombstone or removed).
+ cmpldi(CR0, monitor, ObjectMonitorTable::SpecialPointerValues::below_is_special);
+ blt(CR0, slow_path);
+
+ // Check if object matches.
+ ld(tmp3, in_bytes(ObjectMonitor::object_offset()), monitor);
+ BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs_asm->try_resolve_weak_handle(this, tmp3, tmp2, slow_path);
+ cmpd(CR0, tmp3, obj);
+ bne(CR0, slow_path);
bind(monitor_found);
- ld(monitor, in_bytes(OMCache::oop_to_monitor_difference()), cache_addr);
// Compute owner address.
addi(owner_addr, monitor, in_bytes(ObjectMonitor::owner_offset()));
@@ -3185,23 +3201,17 @@ Register MacroAssembler::encode_klass_not_null(Register dst, Register src) {
void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
assert(!UseCompactObjectHeaders, "not with compact headers");
- if (UseCompressedClassPointers) {
- Register compressedKlass = encode_klass_not_null(ck, klass);
- stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop);
- } else {
- std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
- }
+ Register compressedKlass = encode_klass_not_null(ck, klass);
+ stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop);
}
void MacroAssembler::store_klass_gap(Register dst_oop, Register val) {
assert(!UseCompactObjectHeaders, "not with compact headers");
- if (UseCompressedClassPointers) {
- if (val == noreg) {
- val = R0;
- li(val, 0);
- }
- stw(val, oopDesc::klass_gap_offset_in_bytes(), dst_oop);
+ if (val == noreg) {
+ val = R0;
+ li(val, 0);
}
+ stw(val, oopDesc::klass_gap_offset_in_bytes(), dst_oop);
}
int MacroAssembler::instr_size_for_decode_klass_not_null() {
@@ -3210,17 +3220,13 @@ int MacroAssembler::instr_size_for_decode_klass_not_null() {
// Not yet computed?
if (computed_size == -1) {
- if (!UseCompressedClassPointers) {
- computed_size = 0;
- } else {
- // Determine by scratch emit.
- ResourceMark rm;
- int code_size = 8 * BytesPerInstWord;
- CodeBuffer cb("decode_klass_not_null scratch buffer", code_size, 0);
- MacroAssembler* a = new MacroAssembler(&cb);
- a->decode_klass_not_null(R11_scratch1);
- computed_size = a->offset();
- }
+ // Determine by scratch emit.
+ ResourceMark rm;
+ int code_size = 8 * BytesPerInstWord;
+ CodeBuffer cb("decode_klass_not_null scratch buffer", code_size, 0);
+ MacroAssembler* a = new MacroAssembler(&cb);
+ a->decode_klass_not_null(R11_scratch1);
+ computed_size = a->offset();
}
return computed_size;
@@ -3243,18 +3249,14 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
void MacroAssembler::load_klass_no_decode(Register dst, Register src) {
if (UseCompactObjectHeaders) {
load_narrow_klass_compact(dst, src);
- } else if (UseCompressedClassPointers) {
- lwz(dst, oopDesc::klass_offset_in_bytes(), src);
} else {
- ld(dst, oopDesc::klass_offset_in_bytes(), src);
+ lwz(dst, oopDesc::klass_offset_in_bytes(), src);
}
}
void MacroAssembler::load_klass(Register dst, Register src) {
load_klass_no_decode(dst, src);
- if (UseCompressedClassPointers) { // also true for UseCompactObjectHeaders
- decode_klass_not_null(dst);
- }
+ decode_klass_not_null(dst);
}
// Loads the obj's Klass* into dst.
@@ -3270,18 +3272,13 @@ void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) {
void MacroAssembler::cmp_klass(ConditionRegister dst, Register obj, Register klass, Register tmp, Register tmp2) {
assert_different_registers(obj, klass, tmp);
- if (UseCompressedClassPointers) {
- if (UseCompactObjectHeaders) {
- load_narrow_klass_compact(tmp, obj);
- } else {
- lwz(tmp, oopDesc::klass_offset_in_bytes(), obj);
- }
- Register encoded_klass = encode_klass_not_null(tmp2, klass);
- cmpw(dst, tmp, encoded_klass);
+ if (UseCompactObjectHeaders) {
+ load_narrow_klass_compact(tmp, obj);
} else {
- ld(tmp, oopDesc::klass_offset_in_bytes(), obj);
- cmpd(dst, tmp, klass);
+ lwz(tmp, oopDesc::klass_offset_in_bytes(), obj);
}
+ Register encoded_klass = encode_klass_not_null(tmp2, klass);
+ cmpw(dst, tmp, encoded_klass);
}
void MacroAssembler::cmp_klasses_from_objects(ConditionRegister dst, Register obj1, Register obj2, Register tmp1, Register tmp2) {
@@ -3289,14 +3286,10 @@ void MacroAssembler::cmp_klasses_from_objects(ConditionRegister dst, Register ob
load_narrow_klass_compact(tmp1, obj1);
load_narrow_klass_compact(tmp2, obj2);
cmpw(dst, tmp1, tmp2);
- } else if (UseCompressedClassPointers) {
+ } else {
lwz(tmp1, oopDesc::klass_offset_in_bytes(), obj1);
lwz(tmp2, oopDesc::klass_offset_in_bytes(), obj2);
cmpw(dst, tmp1, tmp2);
- } else {
- ld(tmp1, oopDesc::klass_offset_in_bytes(), obj1);
- ld(tmp2, oopDesc::klass_offset_in_bytes(), obj2);
- cmpd(dst, tmp1, tmp2);
}
}
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
index 875602cae58..4be62098bdf 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -70,14 +70,6 @@ class MacroAssembler: public Assembler {
// Move register if destination register and target register are different
inline void mr_if_needed(Register rd, Register rs, bool allow_invalid = false);
inline void fmr_if_needed(FloatRegister rd, FloatRegister rs);
- // This is dedicated for emitting scheduled mach nodes. For better
- // readability of the ad file I put it here.
- // Endgroups are not needed if
- // - the scheduler is off
- // - the scheduler found that there is a natural group end, in that
- // case it reduced the size of the instruction used in the test
- // yielding 'needed'.
- inline void endgroup_if_needed(bool needed);
// Memory barriers.
inline void membar(int bits);
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.inline.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.inline.hpp
index 2b19d84c69c..cdeb8527bea 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.inline.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -72,11 +72,6 @@ inline void MacroAssembler::mr_if_needed(Register rd, Register rs, bool allow_no
inline void MacroAssembler::fmr_if_needed(FloatRegister rd, FloatRegister rs) {
if (rs != rd) fmr(rd, rs);
}
-inline void MacroAssembler::endgroup_if_needed(bool needed) {
- if (needed) {
- endgroup();
- }
-}
inline void MacroAssembler::membar(int bits) {
// Comment: Usage of elemental_membar(bits) is not recommended for Power 8.
@@ -240,13 +235,13 @@ inline bool MacroAssembler::is_bc_far_variant3_at(address instruction_addr) {
// Variant 3, far cond branch to the next instruction, already patched to nops:
//
// nop
- // endgroup
+ // nop
// SKIP/DEST:
//
const int instruction_1 = *(int*)(instruction_addr);
const int instruction_2 = *(int*)(instruction_addr + 4);
return is_nop(instruction_1) &&
- is_endgroup(instruction_2);
+ is_nop(instruction_2);
}
// set dst to -1, 0, +1 as follows: if CR0bi is "greater than", dst is set to 1,
diff --git a/src/hotspot/cpu/ppc/matcher_ppc.hpp b/src/hotspot/cpu/ppc/matcher_ppc.hpp
index aad41fb7b1c..cbe882648b8 100644
--- a/src/hotspot/cpu/ppc/matcher_ppc.hpp
+++ b/src/hotspot/cpu/ppc/matcher_ppc.hpp
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -64,12 +65,10 @@
return true;
}
- // Use conditional move (CMOVL) on Power7.
static constexpr int long_cmove_cost() { return 0; } // this only makes long cmoves more expensive than int cmoves
- // Suppress CMOVF. Conditional move available (sort of) on PPC64 only from P7 onwards. Not exploited yet.
- // fsel doesn't accept a condition register as input, so this would be slightly different.
- static int float_cmove_cost() { return ConditionalMoveLimit; }
+ // Suppress CMOVF for Power8 because there are no fast nodes.
+ static int float_cmove_cost() { return (PowerArchitecturePPC64 >= 9) ? 0 : ConditionalMoveLimit; }
// This affects two different things:
// - how Decode nodes are matched
@@ -88,7 +87,6 @@
static bool narrow_klass_use_complex_address() {
NOT_LP64(ShouldNotCallThis());
- assert(UseCompressedClassPointers, "only for compressed klass code");
// TODO: PPC port if (MatchDecodeNodes) return true;
return false;
}
diff --git a/src/hotspot/cpu/ppc/methodHandles_ppc.cpp b/src/hotspot/cpu/ppc/methodHandles_ppc.cpp
index 803bb6bfe69..ae94a9618b5 100644
--- a/src/hotspot/cpu/ppc/methodHandles_ppc.cpp
+++ b/src/hotspot/cpu/ppc/methodHandles_ppc.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -49,11 +49,6 @@
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
-// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
-inline static RegisterOrConstant constant(int value) {
- return RegisterOrConstant(value);
-}
-
void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg,
Register temp_reg, Register temp2_reg) {
if (VerifyMethodHandles) {
@@ -109,14 +104,13 @@ void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Registe
__ andi(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
__ cmpwi(CR1, temp, ref_kind);
__ beq(CR1, L);
- { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
- jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
- if (ref_kind == JVM_REF_invokeVirtual ||
- ref_kind == JVM_REF_invokeSpecial)
- // could do this for all ref_kinds, but would explode assembly code size
- trace_method_handle(_masm, buf);
- __ stop(buf);
+ const char* msg = ref_kind_to_verify_msg(ref_kind);
+ if (ref_kind == JVM_REF_invokeVirtual ||
+ ref_kind == JVM_REF_invokeSpecial) {
+ // could do this for all ref_kinds, but would explode assembly code size
+ trace_method_handle(_masm, msg);
}
+ __ stop(msg);
BLOCK_COMMENT("} verify_ref_kind");
__ BIND(L);
}
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index 2a0a9149bb3..f3d33b4305d 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
-// Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+// Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2012, 2026 SAP SE. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -2234,6 +2234,12 @@ bool Matcher::match_rule_supported(int opcode) {
case Op_FmaVD:
return (SuperwordUseVSX && UseFMA);
+ case Op_MinF:
+ case Op_MaxF:
+ case Op_MinD:
+ case Op_MaxD:
+ return (PowerArchitecturePPC64 >= 9);
+
case Op_Digit:
return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit);
case Op_LowerCase:
@@ -2406,10 +2412,8 @@ bool Matcher::is_generic_vector(MachOper* opnd) {
return false;
}
-// Return whether or not this register is ever used as an argument. This
-// function is used on startup to build the trampoline stubs in generateOptoStub.
-// Registers not mentioned will be killed by the VM call in the trampoline, and
-// arguments in those registers not be available to the callee.
+#ifdef ASSERT
+// Return whether or not this register is ever used as an argument.
bool Matcher::can_be_java_arg(int reg) {
// We must include the virtual halves in order to get STDs and LDs
// instead of STWs and LWs in the trampoline stubs.
@@ -2441,10 +2445,7 @@ bool Matcher::can_be_java_arg(int reg) {
return false;
}
-
-bool Matcher::is_spillable_arg(int reg) {
- return can_be_java_arg(reg);
-}
+#endif
uint Matcher::int_pressure_limit()
{
@@ -2456,10 +2457,6 @@ uint Matcher::float_pressure_limit()
return (FLOATPRESSURE == -1) ? 28 : FLOATPRESSURE;
}
-bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
- return false;
-}
-
// Register for DIVI projection of divmodI.
const RegMask& Matcher::divI_proj_mask() {
ShouldNotReachHere();
@@ -3024,7 +3021,6 @@ encode %{
%}
enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{
- // use isel instruction with Power 7
cmpP_reg_imm16Node *n_compare = new cmpP_reg_imm16Node();
encodeP_subNode *n_sub_base = new encodeP_subNode();
encodeP_shiftNode *n_shift = new encodeP_shiftNode();
@@ -3099,7 +3095,6 @@ encode %{
n_shift->_opnds[1] = op_src;
n_shift->_bottom_type = _bottom_type;
- // use isel instruction with Power 7
decodeN_addNode *n_add_base = new decodeN_addNode();
n_add_base->add_req(n_region, n_shift);
n_add_base->_opnds[0] = op_dst;
@@ -3711,13 +3706,6 @@ frame %{
// Compiled code's Frame Pointer.
frame_pointer(R1); // R1_SP
- // Interpreter stores its frame pointer in a register which is
- // stored to the stack by I2CAdaptors. I2CAdaptors convert from
- // interpreted java to compiled java.
- //
- // R14_state holds pointer to caller's cInterpreter.
- interpreter_frame_pointer(R14); // R14_state
-
stack_alignment(frame::alignment_in_bytes);
// Number of outgoing stack slots killed above the
@@ -6335,36 +6323,8 @@ instruct loadConD_Ex(regD dst, immD src) %{
// Prefetch instructions.
// Must be safe to execute with invalid address (cannot fault).
-// Special prefetch versions which use the dcbz instruction.
-instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{
- match(PrefetchAllocation (AddP mem src));
- predicate(AllocatePrefetchStyle == 3);
- ins_cost(MEMORY_REF_COST);
-
- format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %}
- size(4);
- ins_encode %{
- __ dcbz($src$$Register, $mem$$base$$Register);
- %}
- ins_pipe(pipe_class_memory);
-%}
-
-instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{
- match(PrefetchAllocation mem);
- predicate(AllocatePrefetchStyle == 3);
- ins_cost(MEMORY_REF_COST);
-
- format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %}
- size(4);
- ins_encode %{
- __ dcbz($mem$$base$$Register);
- %}
- ins_pipe(pipe_class_memory);
-%}
-
instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
match(PrefetchAllocation (AddP mem src));
- predicate(AllocatePrefetchStyle != 3);
ins_cost(MEMORY_REF_COST);
format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
@@ -6377,7 +6337,6 @@ instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
instruct prefetch_alloc_no_offset(indirectMemory mem) %{
match(PrefetchAllocation mem);
- predicate(AllocatePrefetchStyle != 3);
ins_cost(MEMORY_REF_COST);
format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
@@ -6618,7 +6577,6 @@ instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
ins_pipe(pipe_class_default);
%}
-// Power 7 can use isel instruction
instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
// The match rule is needed to make it a 'MachTypeNode'!
match(Set dst (EncodeP (Binary crx src1)));
@@ -7172,6 +7130,18 @@ instruct membar_release_lock() %{
ins_pipe(pipe_class_default);
%}
+instruct membar_storeload() %{
+ match(MemBarStoreLoad);
+ ins_cost(4*MEMORY_REF_COST);
+
+ format %{ "MEMBAR-store-load" %}
+ size(4);
+ ins_encode %{
+ __ fence();
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
instruct membar_volatile() %{
match(MemBarVolatile);
ins_cost(4*MEMORY_REF_COST);
@@ -7214,6 +7184,18 @@ instruct membar_volatile() %{
// ins_pipe(pipe_class_default);
//%}
+instruct membar_full() %{
+ match(MemBarFull);
+ ins_cost(4*MEMORY_REF_COST);
+
+ format %{ "MEMBAR-full" %}
+ size(4);
+ ins_encode %{
+ __ fence();
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
instruct membar_CPUOrder() %{
match(MemBarCPUOrder);
ins_cost(0);
@@ -7293,7 +7275,6 @@ instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{
ins_variable_size_depending_on_alignment(true);
format %{ "CMOVEF $cmp, $crx, $dst, $src\n\t" %}
- // Worst case is branch + move + stop, no stop without scheduler.
size(8);
ins_encode %{
Label done;
@@ -7313,7 +7294,6 @@ instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
ins_variable_size_depending_on_alignment(true);
format %{ "CMOVEF $cmp, $crx, $dst, $src\n\t" %}
- // Worst case is branch + move + stop, no stop without scheduler.
size(8);
ins_encode %{
Label done;
@@ -7326,6 +7306,70 @@ instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
ins_pipe(pipe_class_default);
%}
+instruct cmovF_cmpF(cmpOp cop, regF op1, regF op2, regF dst, regF false_result, regF true_result, regD tmp) %{
+ match(Set dst (CMoveF (Binary cop (CmpF op1 op2)) (Binary false_result true_result)));
+ predicate(PowerArchitecturePPC64 >= 9);
+ effect(TEMP tmp);
+ ins_cost(2*DEFAULT_COST);
+ format %{ "cmovF_cmpF $dst = ($op1 $cop $op2) ? $true_result : $false_result\n\t" %}
+ size(8);
+ ins_encode %{
+ __ cmovF($cop$$cmpcode, $dst$$FloatRegister->to_vsr(),
+ $op1$$FloatRegister->to_vsr(), $op2$$FloatRegister->to_vsr(),
+ $true_result$$FloatRegister->to_vsr(), $false_result$$FloatRegister->to_vsr(),
+ $tmp$$FloatRegister->to_vsr());
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct cmovF_cmpD(cmpOp cop, regD op1, regD op2, regF dst, regF false_result, regF true_result, regD tmp) %{
+ match(Set dst (CMoveF (Binary cop (CmpD op1 op2)) (Binary false_result true_result)));
+ predicate(PowerArchitecturePPC64 >= 9);
+ effect(TEMP tmp);
+ ins_cost(2*DEFAULT_COST);
+ format %{ "cmovF_cmpD $dst = ($op1 $cop $op2) ? $true_result : $false_result\n\t" %}
+ size(8);
+ ins_encode %{
+ __ cmovF($cop$$cmpcode, $dst$$FloatRegister->to_vsr(),
+ $op1$$FloatRegister->to_vsr(), $op2$$FloatRegister->to_vsr(),
+ $true_result$$FloatRegister->to_vsr(), $false_result$$FloatRegister->to_vsr(),
+ $tmp$$FloatRegister->to_vsr());
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct cmovD_cmpD(cmpOp cop, regD op1, regD op2, regD dst, regD false_result, regD true_result, regD tmp) %{
+ match(Set dst (CMoveD (Binary cop (CmpD op1 op2)) (Binary false_result true_result)));
+ predicate(PowerArchitecturePPC64 >= 9);
+ effect(TEMP tmp);
+ ins_cost(2*DEFAULT_COST);
+ format %{ "cmovD_cmpD $dst = ($op1 $cop $op2) ? $true_result : $false_result\n\t" %}
+ size(8);
+ ins_encode %{
+ __ cmovF($cop$$cmpcode, $dst$$FloatRegister->to_vsr(),
+ $op1$$FloatRegister->to_vsr(), $op2$$FloatRegister->to_vsr(),
+ $true_result$$FloatRegister->to_vsr(), $false_result$$FloatRegister->to_vsr(),
+ $tmp$$FloatRegister->to_vsr());
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct cmovD_cmpF(cmpOp cop, regF op1, regF op2, regD dst, regD false_result, regD true_result, regD tmp) %{
+ match(Set dst (CMoveD (Binary cop (CmpF op1 op2)) (Binary false_result true_result)));
+ predicate(PowerArchitecturePPC64 >= 9);
+ effect(TEMP tmp);
+ ins_cost(2*DEFAULT_COST);
+ format %{ "cmovD_cmpF $dst = ($op1 $cop $op2) ? $true_result : $false_result\n\t" %}
+ size(8);
+ ins_encode %{
+ __ cmovF($cop$$cmpcode, $dst$$FloatRegister->to_vsr(),
+ $op1$$FloatRegister->to_vsr(), $op2$$FloatRegister->to_vsr(),
+ $true_result$$FloatRegister->to_vsr(), $false_result$$FloatRegister->to_vsr(),
+ $tmp$$FloatRegister->to_vsr());
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
//----------Compare-And-Swap---------------------------------------------------
// CompareAndSwap{P,I,L} have more than one output, therefore "CmpI
@@ -8492,7 +8536,6 @@ instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{
ins_variable_size_depending_on_alignment(true);
format %{ "CMOVE $dst, neg($src1), $crx" %}
- // Worst case is branch + move + stop, no stop without scheduler.
size(8);
ins_encode %{
Label done;
@@ -8551,7 +8594,6 @@ instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{
ins_variable_size_depending_on_alignment(true);
format %{ "CMOVE $dst, neg($src1), $crx" %}
- // Worst case is branch + move + stop, no stop without scheduler.
size(8);
ins_encode %{
Label done;
@@ -10261,8 +10303,7 @@ instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{
ins_variable_size_depending_on_alignment(true);
- format %{ "cmovI $crx, $dst, $src" %}
- // Worst case is branch + move + stop, no stop without scheduler.
+ format %{ "CMOVI $crx, $dst, $src" %}
size(8);
ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
ins_pipe(pipe_class_default);
@@ -10275,8 +10316,7 @@ instruct cmovI_bso_reg(iRegIdst dst, flagsRegSrc crx, regD src) %{
ins_variable_size_depending_on_alignment(true);
- format %{ "cmovI $crx, $dst, $src" %}
- // Worst case is branch + move + stop, no stop without scheduler.
+ format %{ "CMOVI $crx, $dst, $src" %}
size(8);
ins_encode( enc_cmove_bso_reg(dst, crx, src) );
ins_pipe(pipe_class_default);
@@ -10288,7 +10328,7 @@ instruct cmovI_bso_reg_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, regD src) %{
effect(DEF dst, USE crx, USE src);
predicate(false);
- format %{ "CmovI $dst, $crx, $src \t// postalloc expanded" %}
+ format %{ "CMOVI $dst, $crx, $src \t// postalloc expanded" %}
postalloc_expand %{
//
// replaces
@@ -10438,8 +10478,7 @@ instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{
ins_variable_size_depending_on_alignment(true);
- format %{ "cmovL $crx, $dst, $src" %}
- // Worst case is branch + move + stop, no stop without scheduler.
+ format %{ "CMOVL $crx, $dst, $src" %}
size(8);
ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
ins_pipe(pipe_class_default);
@@ -10452,8 +10491,7 @@ instruct cmovL_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
ins_variable_size_depending_on_alignment(true);
- format %{ "cmovL $crx, $dst, $src" %}
- // Worst case is branch + move + stop, no stop without scheduler.
+ format %{ "CMOVL $crx, $dst, $src" %}
size(8);
ins_encode( enc_cmove_bso_reg(dst, crx, src) );
ins_pipe(pipe_class_default);
@@ -10465,7 +10503,7 @@ instruct cmovL_bso_reg_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, regD src) %{
effect(DEF dst, USE crx, USE src);
predicate(false);
- format %{ "CmovL $dst, $crx, $src \t// postalloc expanded" %}
+ format %{ "CMOVL $dst, $crx, $src \t// postalloc expanded" %}
postalloc_expand %{
//
// replaces
@@ -10666,9 +10704,9 @@ instruct convF2HF_reg_reg(iRegIdst dst, regF src, regF tmp) %{
effect(TEMP tmp);
ins_cost(3 * DEFAULT_COST);
size(12);
- format %{ "xscvdphp $tmp, $src\t# convert to half precision\n\t"
- "mffprd $dst, $tmp\t# move result from $tmp to $dst\n\t"
- "extsh $dst, $dst\t# make it a proper short"
+ format %{ "XSCVDPHP $tmp, $src\t# convert to half precision\n\t"
+ "MFFPRD $dst, $tmp\t# move result from $tmp to $dst\n\t"
+ "EXTSH $dst, $dst\t# make it a proper short"
%}
ins_encode %{
__ f2hf($dst$$Register, $src$$FloatRegister, $tmp$$FloatRegister);
@@ -10680,8 +10718,8 @@ instruct convHF2F_reg_reg(regF dst, iRegIsrc src) %{
match(Set dst (ConvHF2F src));
ins_cost(2 * DEFAULT_COST);
size(8);
- format %{ "mtfprd $dst, $src\t# move source from $src to $dst\n\t"
- "xscvhpdp $dst, $dst\t# convert from half precision"
+ format %{ "MTFPRD $dst, $src\t# move source from $src to $dst\n\t"
+ "XSCVHPDP $dst, $dst\t# convert from half precision"
%}
ins_encode %{
__ hf2f($dst$$FloatRegister, $src$$Register);
@@ -11079,8 +11117,7 @@ instruct cmov_bns_less(flagsReg crx) %{
ins_variable_size_depending_on_alignment(true);
- format %{ "cmov $crx" %}
- // Worst case is branch + move + stop, no stop without scheduler.
+ format %{ "CMOV $crx" %}
size(12);
ins_encode %{
Label done;
@@ -11108,7 +11145,7 @@ instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{
match(Set crx (CmpF src1 src2));
ins_cost(DEFAULT_COST+BRANCH_COST);
- format %{ "CmpF $crx, $src1, $src2 \t// postalloc expanded" %}
+ format %{ "CMPF $crx, $src1, $src2 \t// postalloc expanded" %}
postalloc_expand %{
//
// replaces
@@ -12261,6 +12298,58 @@ instruct maxI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegC
ins_pipe(pipe_class_default);
%}
+instruct minF(regF dst, regF src1, regF src2) %{
+ match(Set dst (MinF src1 src2));
+ predicate(PowerArchitecturePPC64 >= 9);
+ ins_cost(DEFAULT_COST);
+
+ format %{ "XSMINJDP $dst, $src1, $src2\t// MinF" %}
+ size(4);
+ ins_encode %{
+ __ xsminjdp($dst$$FloatRegister->to_vsr(), $src1$$FloatRegister->to_vsr(), $src2$$FloatRegister->to_vsr());
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct minD(regD dst, regD src1, regD src2) %{
+ match(Set dst (MinD src1 src2));
+ predicate(PowerArchitecturePPC64 >= 9);
+ ins_cost(DEFAULT_COST);
+
+ format %{ "XSMINJDP $dst, $src1, $src2\t// MinD" %}
+ size(4);
+ ins_encode %{
+ __ xsminjdp($dst$$FloatRegister->to_vsr(), $src1$$FloatRegister->to_vsr(), $src2$$FloatRegister->to_vsr());
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct maxF(regF dst, regF src1, regF src2) %{
+ match(Set dst (MaxF src1 src2));
+ predicate(PowerArchitecturePPC64 >= 9);
+ ins_cost(DEFAULT_COST);
+
+ format %{ "XSMAXJDP $dst, $src1, $src2\t// MaxF" %}
+ size(4);
+ ins_encode %{
+ __ xsmaxjdp($dst$$FloatRegister->to_vsr(), $src1$$FloatRegister->to_vsr(), $src2$$FloatRegister->to_vsr());
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct maxD(regD dst, regD src1, regD src2) %{
+ match(Set dst (MaxD src1 src2));
+ predicate(PowerArchitecturePPC64 >= 9);
+ ins_cost(DEFAULT_COST);
+
+ format %{ "XSMAXJDP $dst, $src1, $src2\t// MaxD" %}
+ size(4);
+ ins_encode %{
+ __ xsmaxjdp($dst$$FloatRegister->to_vsr(), $src1$$FloatRegister->to_vsr(), $src2$$FloatRegister->to_vsr());
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
//---------- Population Count Instructions ------------------------------------
instruct popCountI(iRegIdst dst, iRegIsrc src) %{
@@ -13783,7 +13872,7 @@ instruct vfma2D_neg2(vecX dst, vecX src1, vecX src2) %{
instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
match(Set cr0 (OverflowAddL op1 op2));
- format %{ "add_ $op1, $op2\t# overflow check long" %}
+ format %{ "ADD_ $op1, $op2\t# overflow check long" %}
size(12);
ins_encode %{
__ li(R0, 0);
@@ -13796,7 +13885,7 @@ instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
match(Set cr0 (OverflowSubL op1 op2));
- format %{ "subfo_ R0, $op2, $op1\t# overflow check long" %}
+ format %{ "SUBFO_ R0, $op2, $op1\t# overflow check long" %}
size(12);
ins_encode %{
__ li(R0, 0);
@@ -13809,7 +13898,7 @@ instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
match(Set cr0 (OverflowSubL zero op2));
- format %{ "nego_ R0, $op2\t# overflow check long" %}
+ format %{ "NEGO_ R0, $op2\t# overflow check long" %}
size(12);
ins_encode %{
__ li(R0, 0);
@@ -13822,7 +13911,7 @@ instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
match(Set cr0 (OverflowMulL op1 op2));
- format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %}
+ format %{ "MULLDO_ R0, $op1, $op2\t# overflow check long" %}
size(12);
ins_encode %{
__ li(R0, 0);
@@ -14199,7 +14288,7 @@ instruct ForwardExceptionjmp()
match(ForwardException);
ins_cost(CALL_COST);
- format %{ "Jmp forward_exception_stub" %}
+ format %{ "JMP forward_exception_stub" %}
ins_encode %{
__ set_inst_mark();
__ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
@@ -14227,7 +14316,7 @@ instruct RethrowException() %{
match(Rethrow);
ins_cost(CALL_COST);
- format %{ "Jmp rethrow_stub" %}
+ format %{ "JMP rethrow_stub" %}
ins_encode %{
__ set_inst_mark();
__ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type);
@@ -14269,20 +14358,6 @@ instruct tlsLoadP(threadRegP dst) %{
//---Some PPC specific nodes---------------------------------------------------
-// Stop a group.
-instruct endGroup() %{
- ins_cost(0);
-
- ins_is_nop(true);
-
- format %{ "End Bundle (ori r1, r1, 0)" %}
- size(4);
- ins_encode %{
- __ endgroup();
- %}
- ins_pipe(pipe_class_default);
-%}
-
// Nop instructions
instruct fxNop() %{
diff --git a/src/hotspot/cpu/ppc/registerMap_ppc.cpp b/src/hotspot/cpu/ppc/registerMap_ppc.cpp
new file mode 100644
index 00000000000..2e7f8af89d3
--- /dev/null
+++ b/src/hotspot/cpu/ppc/registerMap_ppc.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "runtime/registerMap.hpp"
+
+address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
+ if (base_reg->is_VectorRegister()) {
+ // Not all physical slots belonging to a VectorRegister have corresponding
+ // valid VMReg locations in the RegisterMap.
+ // (See RegisterSaver::push_frame_reg_args_and_save_live_registers.)
+ // However, the slots are always saved to the stack in a contiguous region
+ // of memory so we can calculate the address of the upper slots by
+ // offsetting from the base address.
+ assert(base_reg->is_concrete(), "must pass base reg");
+ address base_location = location(base_reg, nullptr);
+ if (base_location != nullptr) {
+ intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
+ return base_location + offset_in_bytes;
+ } else {
+ return nullptr;
+ }
+ } else {
+ return location(base_reg->next(slot_idx), nullptr);
+ }
+}
diff --git a/src/hotspot/cpu/ppc/registerMap_ppc.hpp b/src/hotspot/cpu/ppc/registerMap_ppc.hpp
index 01eb642107c..607c712d10f 100644
--- a/src/hotspot/cpu/ppc/registerMap_ppc.hpp
+++ b/src/hotspot/cpu/ppc/registerMap_ppc.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2013 SAP SE. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,9 +35,7 @@
// Since there is none, we just return null.
address pd_location(VMReg reg) const { return nullptr; }
- address pd_location(VMReg base_reg, int slot_idx) const {
- return location(base_reg->next(slot_idx), nullptr);
- }
+ address pd_location(VMReg base_reg, int slot_idx) const;
// no PD state to clear or copy:
void pd_clear() {}
diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
index 4eb2028f529..53644210415 100644
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -102,7 +102,7 @@ class RegisterSaver {
// During deoptimization only the result registers need to be restored
// all the other values have already been extracted.
- static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes);
+ static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes, bool save_vectors);
// Constants and data structures:
@@ -349,7 +349,7 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
}
// Note that generate_oop_map in the following loop is only used for the
- // polling_page_vectors_safepoint_handler_blob.
+ // polling_page_vectors_safepoint_handler_blob and the deopt_blob.
// The order in which the vector contents are stored depends on Endianess and
// the utilized instructions (PowerArchitecturePPC64).
assert(is_aligned(offset, StackAlignmentInBytes), "should be");
@@ -361,6 +361,7 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
__ stxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
// Note: The contents were read in the same order (see loadV16_Power9 node in ppc.ad).
+ // RegisterMap::pd_location only uses the first VMReg for each VectorRegister.
if (generate_oop_map) {
map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2),
RegisterSaver_LiveVecRegs[i LITTLE_ENDIAN_ONLY(+1) ].vmreg);
@@ -380,6 +381,7 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
__ stxvd2x(as_VectorRegister(reg_num)->to_vsr(), R31, R1_SP);
}
// Note: The contents were read in the same order (see loadV16_Power8 / loadV16_Power9 node in ppc.ad).
+ // RegisterMap::pd_location only uses the first VMReg for each VectorRegister.
if (generate_oop_map) {
VMReg vsr = RegisterSaver_LiveVecRegs[i].vmreg;
map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), vsr);
@@ -566,10 +568,14 @@ void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm
}
// Restore the registers that might be holding a result.
-void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes) {
+void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes, bool save_vectors) {
const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
sizeof(RegisterSaver::LiveRegType);
- const int register_save_size = regstosave_num * reg_size; // VS registers not relevant here.
+ const int vecregstosave_num = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
+ sizeof(RegisterSaver::LiveRegType))
+ : 0;
+ const int register_save_size = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
+
const int register_save_offset = frame_size_in_bytes - register_save_size;
// restore all result registers (ints and floats)
@@ -598,7 +604,7 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_siz
offset += reg_size;
}
- assert(offset == frame_size_in_bytes, "consistency check");
+ assert(offset == frame_size_in_bytes - (save_vectors ? vecregstosave_num * vec_reg_size : 0), "consistency check");
}
// Is vector's size (in bytes) bigger than a size saved by default?
@@ -775,7 +781,6 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
return stk;
}
-#if defined(COMPILER1) || defined(COMPILER2)
// Calling convention for calling C code.
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
VMRegPair *regs,
@@ -913,7 +918,6 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
return MAX2(arg, 8) * 2 + additional_frame_header_slots;
#endif
}
-#endif // COMPILER2
int SharedRuntime::vector_calling_convention(VMRegPair *regs,
uint num_bits,
@@ -2874,7 +2878,6 @@ void SharedRuntime::generate_deopt_blob() {
CodeBuffer buffer(name, 2048, 1024);
InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
Label exec_mode_initialized;
- int frame_size_in_words;
OopMap* map = nullptr;
OopMapSet *oop_maps = new OopMapSet();
@@ -2886,6 +2889,9 @@ void SharedRuntime::generate_deopt_blob() {
const Register exec_mode_reg = R21_tmp1;
const address start = __ pc();
+ int exception_offset = 0;
+ int exception_in_tls_offset = 0;
+ int reexecute_offset = 0;
#if defined(COMPILER1) || defined(COMPILER2)
// --------------------------------------------------------------------------
@@ -2909,7 +2915,8 @@ void SharedRuntime::generate_deopt_blob() {
map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
&first_frame_size_in_bytes,
/*generate_oop_map=*/ true,
- RegisterSaver::return_pc_is_lr);
+ RegisterSaver::return_pc_is_lr,
+ /*save_vectors*/ SuperwordUseVSX);
assert(map != nullptr, "OopMap must have been created");
__ li(exec_mode_reg, Deoptimization::Unpack_deopt);
@@ -2925,7 +2932,7 @@ void SharedRuntime::generate_deopt_blob() {
// - R3_ARG1: exception oop
// - R4_ARG2: exception pc
- int exception_offset = __ pc() - start;
+ exception_offset = __ pc() - start;
BLOCK_COMMENT("Prolog for exception case");
@@ -2936,21 +2943,20 @@ void SharedRuntime::generate_deopt_blob() {
__ std(R4_ARG2, _abi0(lr), R1_SP);
// Vanilla deoptimization with an exception pending in exception_oop.
- int exception_in_tls_offset = __ pc() - start;
+ exception_in_tls_offset = __ pc() - start;
// Push the "unpack frame".
// Save everything in sight.
RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
&first_frame_size_in_bytes,
/*generate_oop_map=*/ false,
- RegisterSaver::return_pc_is_pre_saved);
+ RegisterSaver::return_pc_is_pre_saved,
+ /*save_vectors*/ SuperwordUseVSX);
// Deopt during an exception. Save exec mode for unpack_frames.
__ li(exec_mode_reg, Deoptimization::Unpack_exception);
// fall through
-
- int reexecute_offset = 0;
#ifdef COMPILER1
__ b(exec_mode_initialized);
@@ -2960,7 +2966,8 @@ void SharedRuntime::generate_deopt_blob() {
RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
&first_frame_size_in_bytes,
/*generate_oop_map=*/ false,
- RegisterSaver::return_pc_is_pre_saved);
+ RegisterSaver::return_pc_is_pre_saved,
+ /*save_vectors*/ SuperwordUseVSX);
__ li(exec_mode_reg, Deoptimization::Unpack_reexecute);
#endif
@@ -2986,7 +2993,7 @@ void SharedRuntime::generate_deopt_blob() {
// Restore only the result registers that have been saved
// by save_volatile_registers(...).
- RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes);
+ RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes, /*save_vectors*/ SuperwordUseVSX);
// reload the exec mode from the UnrollBlock (it might have changed)
__ lwz(exec_mode_reg, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg);
@@ -3068,11 +3075,12 @@ void SharedRuntime::generate_deopt_blob() {
// Return to the interpreter entry point.
__ blr();
- __ flush();
-#else // COMPILER2
+#else // !defined(COMPILER1) && !defined(COMPILER2)
__ unimplemented("deopt blob needed only with compiler");
- int exception_offset = __ pc() - start;
-#endif // COMPILER2
+#endif
+
+ // Make sure all code is generated
+ __ flush();
_deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
reexecute_offset, first_frame_size_in_bytes / wordSize);
diff --git a/src/hotspot/cpu/ppc/stubDeclarations_ppc.hpp b/src/hotspot/cpu/ppc/stubDeclarations_ppc.hpp
index be51afe42a4..41b8b71486d 100644
--- a/src/hotspot/cpu/ppc/stubDeclarations_ppc.hpp
+++ b/src/hotspot/cpu/ppc/stubDeclarations_ppc.hpp
@@ -29,35 +29,40 @@
#define STUBGEN_PREUNIVERSE_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(preuniverse, 0) \
#define STUBGEN_INITIAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(initial, 20000) \
#define STUBGEN_CONTINUATION_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(continuation, 2000) \
#define STUBGEN_COMPILER_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(compiler, 24000) \
#define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(final, 24000) \
diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
index e48778a8b9f..f528587a8bb 100644
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
@@ -5095,7 +5095,7 @@ void generate_lookup_secondary_supers_table_stub() {
}
public:
- StubGenerator(CodeBuffer* code, BlobId blob_id) : StubCodeGenerator(code, blob_id) {
+ StubGenerator(CodeBuffer* code, BlobId blob_id, AOTStubData *stub_data) : StubCodeGenerator(code, blob_id, stub_data) {
switch(blob_id) {
case BlobId::stubgen_preuniverse_id:
generate_preuniverse_stubs();
@@ -5119,7 +5119,7 @@ void generate_lookup_secondary_supers_table_stub() {
}
};
-void StubGenerator_generate(CodeBuffer* code, BlobId blob_id) {
- StubGenerator g(code, blob_id);
+void StubGenerator_generate(CodeBuffer* code, BlobId blob_id, AOTStubData *stub_data) {
+ StubGenerator g(code, blob_id, stub_data);
}
diff --git a/src/hotspot/cpu/ppc/stubRoutines_ppc_64.cpp b/src/hotspot/cpu/ppc/stubRoutines_ppc_64.cpp
index 914c5a17a19..3b7ee66348a 100644
--- a/src/hotspot/cpu/ppc/stubRoutines_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/stubRoutines_ppc_64.cpp
@@ -183,3 +183,9 @@ address StubRoutines::ppc::generate_crc_constants(juint reverse_poly) {
return consts;
}
+
+#if INCLUDE_CDS
+// nothing to do for ppc
+void StubRoutines::init_AOTAddressTable() {
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
index 8a3af748fa1..37f780535b4 100644
--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
@@ -3489,7 +3489,7 @@ void TemplateTable::invokevirtual(int byte_no) {
// Get receiver klass.
__ load_klass_check_null_throw(Rrecv_klass, Rrecv, R11_scratch1);
__ verify_klass_ptr(Rrecv_klass);
- __ profile_virtual_call(Rrecv_klass, R11_scratch1, R12_scratch2, false);
+ __ profile_virtual_call(Rrecv_klass, R11_scratch1, R12_scratch2);
generate_vtable_call(Rrecv_klass, Rvtableindex_or_method, Rret_addr, R11_scratch1);
}
@@ -3596,7 +3596,7 @@ void TemplateTable::invokeinterface_object_method(Register Rrecv_klass,
// Non-final callc case.
__ bind(LnotFinal);
__ lhz(Rindex, in_bytes(ResolvedMethodEntry::table_index_offset()), Rcache);
- __ profile_virtual_call(Rrecv_klass, Rtemp1, Rscratch, false);
+ __ profile_virtual_call(Rrecv_klass, Rtemp1, Rscratch);
generate_vtable_call(Rrecv_klass, Rindex, Rret, Rscratch);
}
@@ -3664,7 +3664,7 @@ void TemplateTable::invokeinterface(int byte_no) {
__ lookup_interface_method(Rrecv_klass, Rinterface_klass, noreg, noreg, Rscratch1, Rscratch2,
L_no_such_interface, /*return_method=*/false);
- __ profile_virtual_call(Rrecv_klass, Rscratch1, Rscratch2, false);
+ __ profile_virtual_call(Rrecv_klass, Rscratch1, Rscratch2);
// Find entry point to call.
diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.cpp b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
index 75feb389298..3e3b1103c86 100644
--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
#include "asm/assembler.inline.hpp"
#include "asm/macroAssembler.inline.hpp"
+#include "compiler/compilerDefinitions.inline.hpp"
#include "compiler/disassembler.hpp"
#include "jvm.h"
#include "memory/resourceArea.hpp"
@@ -105,7 +106,7 @@ void VM_Version::initialize() {
if (PowerArchitecturePPC64 >= 9) {
// Performance is good since Power9.
- if (FLAG_IS_DEFAULT(SuperwordUseVSX)) {
+ if (FLAG_IS_DEFAULT(SuperwordUseVSX) && CompilerConfig::is_c2_enabled()) {
FLAG_SET_ERGO(SuperwordUseVSX, true);
}
}
@@ -310,11 +311,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
}
- if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
- FLAG_SET_DEFAULT(UseSHA, false);
- }
-
-
#ifdef COMPILER2
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
UseSquareToLenIntrinsic = true;
@@ -475,19 +471,12 @@ void VM_Version::print_features() {
void VM_Version::determine_features() {
#if defined(ABI_ELFv2)
- // 1 InstWord per call for the blr instruction.
- const int code_size = (num_features+1+2*1)*BytesPerInstWord;
+ const int code_size = (num_features + 1 /*blr*/) * BytesPerInstWord;
#else
- // 7 InstWords for each call (function descriptor + blr instruction).
- const int code_size = (num_features+1+2*7)*BytesPerInstWord;
+ const int code_size = (num_features + 1 /*blr*/ + 6 /* fd */) * BytesPerInstWord;
#endif
int features = 0;
- // create test area
- enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size).
- char test_area[BUFFER_SIZE];
- char *mid_of_test_area = &test_area[BUFFER_SIZE>>1];
-
// Allocate space for the code.
ResourceMark rm;
CodeBuffer cb("detect_cpu_features", code_size, 0);
@@ -497,20 +486,13 @@ void VM_Version::determine_features() {
_features = VM_Version::all_features_m;
// Emit code.
- void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry();
+ void (*test)() = (void(*)())(void *)a->function_entry();
uint32_t *code = (uint32_t *)a->pc();
- // Keep R3_ARG1 unmodified, it contains &field (see below).
- // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
a->mfdscr(R0);
a->darn(R7);
a->brw(R5, R6);
a->blr();
- // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
- void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry();
- a->dcbz(R3_ARG1); // R3_ARG1 = addr
- a->blr();
-
uint32_t *code_end = (uint32_t *)a->pc();
a->flush();
_features = VM_Version::unknown_m;
@@ -522,18 +504,9 @@ void VM_Version::determine_features() {
Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
}
- // Measure cache line size.
- memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.
- (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle.
- int count = 0; // count zeroed bytes
- for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
- guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");
- _L1_data_cache_line_size = count;
-
// Execute code. Illegal instructions will be replaced by 0 in the signal handler.
VM_Version::_is_determine_features_test_running = true;
- // We must align the first argument to 16 bytes because of the lqarx check.
- (*test)(align_up((address)mid_of_test_area, 16), 0);
+ (*test)();
VM_Version::_is_determine_features_test_running = false;
// determine which instructions are legal.
@@ -550,6 +523,10 @@ void VM_Version::determine_features() {
}
_features = features;
+
+ _L1_data_cache_line_size = VM_Version::get_dcache_line_size();
+ assert(_L1_data_cache_line_size >= DEFAULT_CACHE_LINE_SIZE,
+ "processors with smaller cache line size are no longer supported");
}
// Power 8: Configure Data Stream Control Register.
diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.hpp b/src/hotspot/cpu/ppc/vm_version_ppc.hpp
index 11dce83bed0..0f4eb3593a3 100644
--- a/src/hotspot/cpu/ppc/vm_version_ppc.hpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -81,6 +81,9 @@ public:
static uint64_t _dscr_val;
static void initialize_cpu_information(void);
+
+ static int get_dcache_line_size();
+ static int get_icache_line_size();
};
#endif // CPU_PPC_VM_VERSION_PPC_HPP
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
index 819d6c05654..8aced227a06 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -196,12 +196,9 @@ void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Registe
if (UseCompactObjectHeaders) {
__ load_narrow_klass_compact(tmp, src);
__ load_narrow_klass_compact(t0, dst);
- } else if (UseCompressedClassPointers) {
+ } else {
__ lwu(tmp, Address(src, oopDesc::klass_offset_in_bytes()));
__ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
- } else {
- __ ld(tmp, Address(src, oopDesc::klass_offset_in_bytes()));
- __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
}
__ bne(tmp, t0, *stub->entry(), /* is_far */ true);
} else {
@@ -243,37 +240,6 @@ void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Registe
}
}
-void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) {
- assert(default_type != nullptr, "null default_type!");
- BasicType basic_type = default_type->element_type()->basic_type();
- if (basic_type == T_ARRAY) { basic_type = T_OBJECT; }
- if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
- // Sanity check the known type with the incoming class. For the
- // primitive case the types must match exactly with src.klass and
- // dst.klass each exactly matching the default type. For the
- // object array case, if no type check is needed then either the
- // dst type is exactly the expected type and the src type is a
- // subtype which we can't check or src is the same array as dst
- // but not necessarily exactly of type default_type.
- Label known_ok, halt;
- __ mov_metadata(tmp, default_type->constant_encoding());
- if (UseCompressedClassPointers) {
- __ encode_klass_not_null(tmp);
- }
-
- if (basic_type != T_OBJECT) {
- __ cmp_klass_compressed(dst, tmp, t0, halt, false);
- __ cmp_klass_compressed(src, tmp, t0, known_ok, true);
- } else {
- __ cmp_klass_compressed(dst, tmp, t0, known_ok, true);
- __ beq(src, dst, known_ok);
- }
- __ bind(halt);
- __ stop("incorrect type information in arraycopy");
- __ bind(known_ok);
- }
-}
-
void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
ciArrayKlass *default_type = op->expected_type();
Register src = op->src()->as_register();
@@ -304,7 +270,28 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
}
#ifdef ASSERT
- arraycopy_assert(src, dst, tmp, default_type, flags);
+ if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
+ // Sanity check the known type with the incoming class. For the
+ // primitive case the types must match exactly with src.klass and
+ // dst.klass each exactly matching the default type. For the
+ // object array case, if no type check is needed then either the
+ // dst type is exactly the expected type and the src type is a
+ // subtype which we can't check or src is the same array as dst
+ // but not necessarily exactly of type default_type.
+ Label known_ok, halt;
+ __ mov_metadata(tmp, default_type->constant_encoding());
+
+ if (basic_type != T_OBJECT) {
+ __ cmp_klass_bne(dst, tmp, t0, t1, halt);
+ __ cmp_klass_beq(src, tmp, t0, t1, known_ok);
+ } else {
+ __ cmp_klass_beq(dst, tmp, t0, t1, known_ok);
+ __ beq(src, dst, known_ok);
+ }
+ __ bind(halt);
+ __ stop("incorrect type information in arraycopy");
+ __ bind(known_ok);
+ }
#endif
#ifndef PRODUCT
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
index 06a0f248ca6..b5452f3e4cd 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -39,7 +39,6 @@
void arraycopy_type_check(Register src, Register src_pos, Register length,
Register dst, Register dst_pos, Register tmp,
CodeStub *stub, BasicType basic_type, int flags);
- void arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags);
void arraycopy_prepare_params(Register src, Register src_pos, Register length,
Register dst, Register dst_pos, BasicType basic_type);
void arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length,
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
index e77a2067e89..29e5d86d0cc 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -55,20 +55,6 @@ const Register SHIFT_count = x10; // where count for shift operations must be
#define __ _masm->
-static void select_different_registers(Register preserve,
- Register extra,
- Register &tmp1,
- Register &tmp2) {
- if (tmp1 == preserve) {
- assert_different_registers(tmp1, tmp2, extra);
- tmp1 = extra;
- } else if (tmp2 == preserve) {
- assert_different_registers(tmp1, tmp2, extra);
- tmp2 = extra;
- }
- assert_different_registers(preserve, tmp1, tmp2);
-}
-
static void select_different_registers(Register preserve,
Register extra,
Register &tmp1,
@@ -1041,31 +1027,10 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
__ bind(*op->stub()->continuation());
}
-void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
- Register recv, Label* update_done) {
- for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
- Label next_test;
- // See if the receiver is receiver[n].
- __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
- __ bne(recv, t1, next_test);
- Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
- __ increment(data_addr, DataLayout::counter_increment);
- __ j(*update_done);
- __ bind(next_test);
- }
-
- // Didn't find receiver; find next empty slot and fill it in
- for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
- Label next_test;
- Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)));
- __ ld(t1, recv_addr);
- __ bnez(t1, next_test);
- __ sd(recv, recv_addr);
- __ mv(t1, DataLayout::counter_increment);
- __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
- __ j(*update_done);
- __ bind(next_test);
- }
+void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md,
+ ciProfileData *data, Register recv) {
+ int mdp_offset = md->byte_offset_of_slot(data, in_ByteSize(0));
+ __ profile_receiver_type(recv, mdo, mdp_offset);
}
void LIR_Assembler::data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data) {
@@ -1139,14 +1104,9 @@ void LIR_Assembler::profile_object(ciMethodData* md, ciProfileData* data, Regist
__ j(*obj_is_null);
__ bind(not_null);
- Label update_done;
Register recv = k_RInfo;
__ load_klass(recv, obj);
- type_profile_helper(mdo, md, data, recv, &update_done);
- Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
- __ increment(counter_addr, DataLayout::counter_increment);
-
- __ bind(update_done);
+ type_profile_helper(mdo, md, data, recv);
}
void LIR_Assembler::typecheck_loaded(LIR_OpTypeCheck *op, ciKlass* k, Register k_RInfo) {
@@ -1181,12 +1141,8 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
} else if (obj == klass_RInfo) {
klass_RInfo = dst;
}
- if (k->is_loaded() && !UseCompressedClassPointers) {
- select_different_registers(obj, dst, k_RInfo, klass_RInfo);
- } else {
- Rtmp1 = op->tmp3()->as_register();
- select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
- }
+ Rtmp1 = op->tmp3()->as_register();
+ select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
assert_different_registers(obj, k_RInfo, klass_RInfo);
@@ -1554,11 +1510,8 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
// We know the type that will be seen at this call site; we can
// statically update the MethodData* rather than needing to do
// dynamic tests on the receiver type
- // NOTE: we should probably put a lock around this search to
- // avoid collisions by concurrent compilations
ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
- uint i;
- for (i = 0; i < VirtualCallData::row_limit(); i++) {
+ for (uint i = 0; i < VirtualCallData::row_limit(); i++) {
ciKlass* receiver = vc_data->receiver(i);
if (known_klass->equals(receiver)) {
Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
@@ -1566,32 +1519,13 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
return;
}
}
-
- // Receiver type not found in profile data; select an empty slot
- // Note that this is less efficient than it should be because it
- // always does a write to the receiver part of the
- // VirtualCallData rather than just the first time
- for (i = 0; i < VirtualCallData::row_limit(); i++) {
- ciKlass* receiver = vc_data->receiver(i);
- if (receiver == nullptr) {
- Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
- __ mov_metadata(t1, known_klass->constant_encoding());
- __ sd(t1, recv_addr);
- Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
- __ increment(data_addr, DataLayout::counter_increment);
- return;
- }
- }
+ // Receiver type is not found in profile data.
+ // Fall back to runtime helper to handle the rest at runtime.
+ __ mov_metadata(recv, known_klass->constant_encoding());
} else {
__ load_klass(recv, recv);
- Label update_done;
- type_profile_helper(mdo, md, data, recv, &update_done);
- // Receiver did not match any saved receiver and there is no empty row for it.
- // Increment total counter to indicate polymorphic case.
- __ increment(counter_addr, DataLayout::counter_increment);
-
- __ bind(update_done);
}
+ type_profile_helper(mdo, md, data, recv);
} else {
// Static call
__ increment(counter_addr, DataLayout::counter_increment);
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
index 1e466e90d37..90b6b3ee4f4 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
@@ -54,9 +54,8 @@ private:
Address stack_slot_address(int index, uint shift, int adjust = 0);
// Record the type of the receiver in ReceiverTypeData
- void type_profile_helper(Register mdo,
- ciMethodData *md, ciProfileData *data,
- Register recv, Label* update_done);
+ void type_profile_helper(Register mdo, ciMethodData *md,
+ ciProfileData *data, Register recv);
void casw(Register addr, Register newval, Register cmpval);
void caswu(Register addr, Register newval, Register cmpval);
diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
index 88565d9136f..f290708a231 100644
--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -1073,9 +1073,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
}
LIR_Opr reg = rlock_result(x);
LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
- if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
- tmp3 = new_register(objectType);
- }
+ tmp3 = new_register(objectType);
__ checkcast(reg, obj.result(), x->klass(),
new_register(objectType), new_register(objectType), tmp3,
x->direct_compare(), info_for_exception, patching_info, stub,
@@ -1094,9 +1092,7 @@ void LIRGenerator::do_InstanceOf(InstanceOf* x) {
}
obj.load_item();
LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
- if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
- tmp3 = new_register(objectType);
- }
+ tmp3 = new_register(objectType);
__ instanceof(reg, obj.result(), x->klass(),
new_register(objectType), new_register(objectType), tmp3,
x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
index aeb077ba0a0..abcc070b253 100644
--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -92,12 +92,8 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
// This assumes that all prototype bits fitr in an int32_t
mv(tmp1, checked_cast(markWord::prototype().value()));
sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
- if (UseCompressedClassPointers) { // Take care not to kill klass
- encode_klass_not_null(tmp1, klass, tmp2);
- sw(tmp1, Address(obj, oopDesc::klass_offset_in_bytes()));
- } else {
- sd(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
- }
+ encode_klass_not_null(tmp1, klass, tmp2);
+ sw(tmp1, Address(obj, oopDesc::klass_offset_in_bytes()));
}
if (len->is_valid()) {
@@ -108,7 +104,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
// Clear gap/first 4 bytes following the length field.
sw(zr, Address(obj, base_offset));
}
- } else if (UseCompressedClassPointers && !UseCompactObjectHeaders) {
+ } else if (!UseCompactObjectHeaders) {
store_klass_gap(obj, zr);
}
}
diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
index b15bb5c23c3..b940393f063 100644
--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
@@ -42,7 +42,6 @@ define_pd_global(bool, TieredCompilation, false);
define_pd_global(intx, CompileThreshold, 1500 );
define_pd_global(intx, OnStackReplacePercentage, 933 );
-define_pd_global(intx, NewSizeThreadIncrease, 4*K );
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
define_pd_global(size_t, ReservedCodeCacheSize, 32*M );
define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M );
@@ -52,7 +51,6 @@ define_pd_global(bool, ProfileInterpreter, false);
define_pd_global(size_t, CodeCacheExpansionSize, 32*K );
define_pd_global(size_t, CodeCacheMinBlockLength, 1);
define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
-define_pd_global(bool, NeverActAsServerClassMachine, true );
define_pd_global(bool, CICompileOSR, true );
#endif // !COMPILER2
define_pd_global(bool, UseTypeProfile, false);
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
index 824ea872935..0d06fd469de 100644
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -30,7 +30,9 @@
#include "opto/intrinsicnode.hpp"
#include "opto/output.hpp"
#include "opto/subnode.hpp"
+#include "runtime/objectMonitorTable.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
#include "utilities/globalDefinitions.hpp"
#ifdef PRODUCT
@@ -123,35 +125,52 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box,
if (!UseObjectMonitorTable) {
assert(tmp1_monitor == tmp1_mark, "should be the same here");
} else {
+ const Register tmp2_hash = tmp2;
+ const Register tmp3_bucket = tmp3;
Label monitor_found;
- // Load cache address
- la(tmp3_t, Address(xthread, JavaThread::om_cache_oops_offset()));
+ // Save the mark, we might need it to extract the hash.
+ mv(tmp2_hash, tmp1_mark);
- const int num_unrolled = 2;
+ // Look for the monitor in the om_cache.
+
+ ByteSize cache_offset = JavaThread::om_cache_oops_offset();
+ ByteSize monitor_offset = OMCache::oop_to_monitor_difference();
+ const int num_unrolled = OMCache::CAPACITY;
for (int i = 0; i < num_unrolled; i++) {
- ld(tmp1, Address(tmp3_t));
- beq(obj, tmp1, monitor_found);
- add(tmp3_t, tmp3_t, in_bytes(OMCache::oop_to_oop_difference()));
+ ld(tmp1_monitor, Address(xthread, cache_offset + monitor_offset));
+ ld(tmp4, Address(xthread, cache_offset));
+ beq(obj, tmp4, monitor_found);
+ cache_offset = cache_offset + OMCache::oop_to_oop_difference();
}
- Label loop;
+ // Look for the monitor in the table.
- // Search for obj in cache.
- bind(loop);
+ // Get the hash code.
+ srli(tmp2_hash, tmp2_hash, markWord::hash_shift);
- // Check for match.
- ld(tmp1, Address(tmp3_t));
- beq(obj, tmp1, monitor_found);
+ // Get the table and calculate the bucket's address.
+ la(tmp3_t, ExternalAddress(ObjectMonitorTable::current_table_address()));
+ ld(tmp3_t, Address(tmp3_t));
+ ld(tmp1, Address(tmp3_t, ObjectMonitorTable::table_capacity_mask_offset()));
+ andr(tmp2_hash, tmp2_hash, tmp1);
+ ld(tmp3_t, Address(tmp3_t, ObjectMonitorTable::table_buckets_offset()));
- // Search until null encountered, guaranteed _null_sentinel at end.
- add(tmp3_t, tmp3_t, in_bytes(OMCache::oop_to_oop_difference()));
- bnez(tmp1, loop);
- // Cache Miss. Take the slowpath.
- j(slow_path);
+ // Read the monitor from the bucket.
+ shadd(tmp3_bucket, tmp2_hash, tmp3_t, tmp4, LogBytesPerWord);
+ ld(tmp1_monitor, Address(tmp3_bucket));
+
+ // Check if the monitor in the bucket is special (empty, tombstone or removed).
+ mv(tmp2, ObjectMonitorTable::SpecialPointerValues::below_is_special);
+ bltu(tmp1_monitor, tmp2, slow_path);
+
+ // Check if object matches.
+ ld(tmp3, Address(tmp1_monitor, ObjectMonitor::object_offset()));
+ BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs_asm->try_resolve_weak_handle_in_c2(this, tmp3, tmp2, slow_path);
+ bne(tmp3, obj, slow_path);
bind(monitor_found);
- ld(tmp1_monitor, Address(tmp3_t, OMCache::oop_to_monitor_difference()));
}
const Register tmp2_owner_addr = tmp2;
@@ -1156,8 +1175,7 @@ void C2_MacroAssembler::string_compare_long_same_encoding(Register result, Regis
Label TAIL_CHECK, TAIL, NEXT_WORD, DIFFERENCE;
const int base_offset = arrayOopDesc::base_offset_in_bytes(T_BYTE);
- assert((base_offset % (UseCompactObjectHeaders ? 4 :
- (UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
+ assert((base_offset % (UseCompactObjectHeaders ? 4 : 8)) == 0, "Must be");
const int minCharsInWord = isLL ? wordSize : wordSize / 2;
@@ -1250,8 +1268,7 @@ void C2_MacroAssembler::string_compare_long_different_encoding(Register result,
Label TAIL, NEXT_WORD, DIFFERENCE;
const int base_offset = arrayOopDesc::base_offset_in_bytes(T_BYTE);
- assert((base_offset % (UseCompactObjectHeaders ? 4 :
- (UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
+ assert((base_offset % (UseCompactObjectHeaders ? 4 : 8)) == 0, "Must be");
Register strL = isLU ? str1 : str2;
Register strU = isLU ? str2 : str1;
@@ -1466,8 +1483,7 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2,
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
- assert((base_offset % (UseCompactObjectHeaders ? 4 :
- (UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
+ assert((base_offset % (UseCompactObjectHeaders ? 4 : 8)) == 0, "Must be");
Register cnt1 = tmp3;
Register cnt2 = tmp1; // cnt2 only used in array length compare
@@ -1592,8 +1608,7 @@ void C2_MacroAssembler::string_equals(Register a1, Register a2,
int base_offset = arrayOopDesc::base_offset_in_bytes(T_BYTE);
- assert((base_offset % (UseCompactObjectHeaders ? 4 :
- (UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
+ assert((base_offset % (UseCompactObjectHeaders ? 4 : 8)) == 0, "Must be");
BLOCK_COMMENT("string_equals {");
@@ -2680,8 +2695,7 @@ void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register resul
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
- assert((base_offset % (UseCompactObjectHeaders ? 4 :
- (UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
+ assert((base_offset % (UseCompactObjectHeaders ? 4 : 8)) == 0, "Must be");
BLOCK_COMMENT("arrays_equals_v {");
@@ -2813,10 +2827,14 @@ void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Regist
// Intrinsic for
//
-// - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray
-// return the number of characters copied.
-// - java/lang/StringUTF16.compress
-// return index of non-latin1 character if copy fails, otherwise 'len'.
+// - sun.nio.cs.ISO_8859_1.Encoder#encodeISOArray0(byte[] sa, int sp, byte[] da, int dp, int len)
+// Encodes char[] to byte[] in ISO-8859-1
+//
+// - java.lang.StringCoding#encodeISOArray0(byte[] sa, int sp, byte[] da, int dp, int len)
+// Encodes byte[] (containing UTF-16) to byte[] in ISO-8859-1
+//
+// - java.lang.StringCoding#encodeAsciiArray0(char[] sa, int sp, byte[] da, int dp, int len)
+// Encodes char[] to byte[] in ASCII
//
// This version always returns the number of characters copied. A successful
// copy will complete with the post-condition: 'res' == 'len', while an
diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
index 648c24ee98b..73ef97939ed 100644
--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
@@ -47,7 +47,6 @@ define_pd_global(intx, ConditionalMoveLimit, 3);
define_pd_global(intx, FreqInlineSize, 325);
define_pd_global(intx, MinJumpTableSize, 10);
define_pd_global(intx, InteriorEntryAlignment, 16);
-define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
// InitialCodeCacheSize derived from specjbb2000 run.
@@ -75,9 +74,6 @@ define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M );
define_pd_global(size_t, CodeCacheMinBlockLength, 6);
define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
-// Ergonomics related flags
-define_pd_global(bool, NeverActAsServerClassMachine, false);
-
define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed.
#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/downcallLinker_riscv.cpp b/src/hotspot/cpu/riscv/downcallLinker_riscv.cpp
index cc685645ec5..f9d7ce78ff0 100644
--- a/src/hotspot/cpu/riscv/downcallLinker_riscv.cpp
+++ b/src/hotspot/cpu/riscv/downcallLinker_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -140,10 +140,10 @@ void DowncallLinker::StubGenerator::generate() {
bool should_save_return_value = !_needs_return_buffer;
RegSpiller out_reg_spiller(_output_registers);
- int spill_offset = -1;
+ int out_spill_offset = -1;
if (should_save_return_value) {
- spill_offset = 0;
+ out_spill_offset = 0;
// spill area can be shared with shadow space and out args,
// since they are only used before the call,
// and spill area is only used after.
@@ -168,6 +168,9 @@ void DowncallLinker::StubGenerator::generate() {
// FP-> | |
// |---------------------| = frame_bottom_offset = frame_size
// | (optional) |
+ // | in_reg_spiller area |
+ // |---------------------|
+ // | (optional) |
// | capture state buf |
// |---------------------| = StubLocations::CAPTURED_STATE_BUFFER
// | (optional) |
@@ -181,6 +184,18 @@ void DowncallLinker::StubGenerator::generate() {
GrowableArray out_regs = ForeignGlobals::replace_place_holders(_input_registers, locs);
ArgumentShuffle arg_shuffle(filtered_java_regs, out_regs, shuffle_reg);
+ // Need to spill for state capturing runtime call.
+ // The area spilled into is distinct from the capture state buffer.
+ RegSpiller in_reg_spiller(out_regs);
+ int in_spill_offset = -1;
+ if (_captured_state_mask != 0) {
+ // The spill area cannot be shared with the out_spill since
+ // spilling needs to happen before the call. Allocate a new
+ // region in the stack for this spill space.
+ in_spill_offset = allocated_frame_size;
+ allocated_frame_size += in_reg_spiller.spill_size_bytes();
+ }
+
#ifndef PRODUCT
LogTarget(Trace, foreign, downcall) lt;
if (lt.is_enabled()) {
@@ -226,6 +241,20 @@ void DowncallLinker::StubGenerator::generate() {
arg_shuffle.generate(_masm, shuffle_reg, 0, _abi._shadow_space_bytes);
__ block_comment("} argument shuffle");
+ if (_captured_state_mask != 0) {
+ assert(in_spill_offset != -1, "must be");
+ __ block_comment("{ load initial thread local");
+ in_reg_spiller.generate_spill(_masm, in_spill_offset);
+
+ // Copy the contents of the capture state buffer into thread local
+ __ ld(c_rarg0, Address(sp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
+ __ mv(c_rarg1, _captured_state_mask);
+ __ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_pre));
+
+ in_reg_spiller.generate_fill(_masm, in_spill_offset);
+ __ block_comment("} load initial thread local");
+ }
+
__ jalr(as_Register(locs.get(StubLocations::TARGET_ADDRESS)));
// this call is assumed not to have killed xthread
@@ -254,15 +283,15 @@ void DowncallLinker::StubGenerator::generate() {
__ block_comment("{ save thread local");
if (should_save_return_value) {
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
}
__ ld(c_rarg0, Address(sp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
__ mv(c_rarg1, _captured_state_mask);
- __ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state));
+ __ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_post));
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
}
__ block_comment("} save thread local");
@@ -319,7 +348,7 @@ void DowncallLinker::StubGenerator::generate() {
if (should_save_return_value) {
// Need to save the native result registers around any runtime calls.
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
}
__ mv(c_rarg0, xthread);
@@ -327,7 +356,7 @@ void DowncallLinker::StubGenerator::generate() {
__ rt_call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
}
__ j(L_after_safepoint_poll);
__ block_comment("} L_safepoint_poll_slow_path");
@@ -339,13 +368,13 @@ void DowncallLinker::StubGenerator::generate() {
if (should_save_return_value) {
// Need to save the native result registers around any runtime calls.
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
}
__ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
}
__ j(L_after_reguard);
diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
index 51a203c548c..d1841a347e9 100644
--- a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -236,8 +236,8 @@ inline bool frame::equal(frame other) const {
// Return unique id for this frame. The id must have a value where we can distinguish
// identity and younger/older relationship. null represents an invalid (incomparable)
-// frame.
-inline intptr_t* frame::id(void) const { return unextended_sp(); }
+// frame. Should not be called for heap frames.
+inline intptr_t* frame::id(void) const { return real_fp(); }
// Return true if the frame is older (less recent activation) than the frame represented by id
inline bool frame::is_older(intptr_t* id) const { assert(this->id() != nullptr && id != nullptr, "null frame id");
@@ -398,6 +398,9 @@ frame frame::sender(RegisterMap* map) const {
StackWatermarkSet::on_iteration(map->thread(), result);
}
+ // Calling frame::id() is currently not supported for heap frames.
+ assert(result._on_heap || this->_on_heap || result.is_older(this->id()), "Must be");
+
return result;
}
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
index f5916000890..aeb9df06de6 100644
--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -369,6 +369,11 @@ OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Na
return opto_reg;
}
+void BarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
+ // Load the oop from the weak handle.
+ __ ld(obj, Address(obj));
+}
+
#undef __
#define __ _masm->
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
index e50fa1dae36..bbb2a5af824 100644
--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -110,6 +110,8 @@ public:
#ifdef COMPILER2
OptoReg::Name refine_register(const Node* node,
OptoReg::Name opto_reg);
+ virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj,
+ Register tmp, Label& slow_path);
#endif // COMPILER2
};
diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
index d94bf428fd2..9eb546a1888 100644
--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -56,8 +56,10 @@ void CardTableBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet d
}
}
-void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
- assert_different_registers(obj, tmp);
+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2) {
+ precond(tmp1 != noreg);
+ precond(tmp2 != noreg);
+ assert_different_registers(obj, tmp1, tmp2);
BarrierSet* bs = BarrierSet::barrier_set();
assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
@@ -65,17 +67,17 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
assert(CardTable::dirty_card_val() == 0, "must be");
- __ load_byte_map_base(tmp);
- __ add(tmp, obj, tmp);
+ __ load_byte_map_base(tmp1);
+ __ add(tmp1, obj, tmp1);
if (UseCondCardMark) {
Label L_already_dirty;
- __ lbu(t1, Address(tmp));
- __ beqz(t1, L_already_dirty);
- __ sb(zr, Address(tmp));
+ __ lbu(tmp2, Address(tmp1));
+ __ beqz(tmp2, L_already_dirty);
+ __ sb(zr, Address(tmp1));
__ bind(L_already_dirty);
} else {
- __ sb(zr, Address(tmp));
+ __ sb(zr, Address(tmp1));
}
}
@@ -119,10 +121,10 @@ void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorS
if (needs_post_barrier) {
// flatten object address if needed
if (!precise || dst.offset() == 0) {
- store_check(masm, dst.base(), tmp3);
+ store_check(masm, dst.base(), tmp1, tmp2);
} else {
__ la(tmp3, dst);
- store_check(masm, tmp3, t0);
+ store_check(masm, tmp3, tmp1, tmp2);
}
}
}
diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
index 6f6e9065103..1576f0a6dd8 100644
--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -31,7 +31,7 @@
class CardTableBarrierSetAssembler: public BarrierSetAssembler {
protected:
- void store_check(MacroAssembler* masm, Register obj, Register tmp);
+ void store_check(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2);
virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, RegSet saved_regs) {}
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
index dd6c8556307..8d530d15ee5 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
* Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -88,26 +89,16 @@ void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, Dec
}
}
-void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register thread,
- Register tmp,
- bool tosca_live,
- bool expand_call) {
- if (ShenandoahSATBBarrier) {
- satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, t0, tosca_live, expand_call);
- }
-}
+void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ bool tosca_live,
+ bool expand_call) {
+ assert(ShenandoahSATBBarrier, "Should be checked by caller");
-void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register thread,
- Register tmp1,
- Register tmp2,
- bool tosca_live,
- bool expand_call) {
// If expand_call is true then we expand the call_VM_leaf macro
// directly to skip generating the check by
// InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
@@ -376,21 +367,21 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
__ enter();
__ push_call_clobbered_registers();
- satb_write_barrier_pre(masm /* masm */,
- noreg /* obj */,
- dst /* pre_val */,
- xthread /* thread */,
- tmp1 /* tmp1 */,
- tmp2 /* tmp2 */,
- true /* tosca_live */,
- true /* expand_call */);
+ satb_barrier(masm /* masm */,
+ noreg /* obj */,
+ dst /* pre_val */,
+ xthread /* thread */,
+ tmp1 /* tmp1 */,
+ tmp2 /* tmp2 */,
+ true /* tosca_live */,
+ true /* expand_call */);
__ pop_call_clobbered_registers();
__ leave();
}
}
-void ShenandoahBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj) {
- assert(ShenandoahCardBarrier, "Did you mean to enable ShenandoahCardBarrier?");
+void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
+ assert(ShenandoahCardBarrier, "Should have been checked by caller");
__ srli(obj, obj, CardTable::card_shift());
@@ -413,13 +404,13 @@ void ShenandoahBarrierSetAssembler::store_check(MacroAssembler* masm, Register o
void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
- bool on_oop = is_reference_type(type);
- if (!on_oop) {
+ // 1: non-reference types require no barriers
+ if (!is_reference_type(type)) {
BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
return;
}
- // flatten object address if needed
+ // Flatten object address right away for simplicity: likely needed by barriers
if (dst.offset() == 0) {
if (dst.base() != tmp3) {
__ mv(tmp3, dst.base());
@@ -428,20 +419,26 @@ void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet
__ la(tmp3, dst);
}
- shenandoah_write_barrier_pre(masm,
- tmp3 /* obj */,
- tmp2 /* pre_val */,
- xthread /* thread */,
- tmp1 /* tmp */,
- val != noreg /* tosca_live */,
- false /* expand_call */);
+ bool storing_non_null = (val != noreg);
+ // 2: pre-barrier: SATB needs the previous value
+ if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
+ satb_barrier(masm,
+ tmp3 /* obj */,
+ tmp2 /* pre_val */,
+ xthread /* thread */,
+ tmp1 /* tmp */,
+ t0 /* tmp2 */,
+ storing_non_null /* tosca_live */,
+ false /* expand_call */);
+ }
+
+ // Store!
BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
- bool in_heap = (decorators & IN_HEAP) != 0;
- bool needs_post_barrier = (val != noreg) && in_heap && ShenandoahCardBarrier;
- if (needs_post_barrier) {
- store_check(masm, tmp3);
+ // 3: post-barrier: card barrier needs store address
+ if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
+ card_barrier(masm, tmp3);
}
}
@@ -465,6 +462,30 @@ void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler
__ bind(done);
}
+#ifdef COMPILER2
+void ShenandoahBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler *masm, Register obj,
+ Register tmp, Label& slow_path) {
+ assert_different_registers(obj, tmp);
+
+ Label done;
+
+ // Resolve weak handle using the standard implementation.
+ BarrierSetAssembler::try_resolve_weak_handle_in_c2(masm, obj, tmp, slow_path);
+
+ // Check if the reference is null, and if it is, take the fast path.
+ __ beqz(obj, done);
+
+ Address gc_state(xthread, ShenandoahThreadLocalData::gc_state_offset());
+ __ lbu(tmp, gc_state);
+
+ // Check if the heap is under weak-reference/roots processing, in
+ // which case we need to take the slow path.
+ __ test_bit(tmp, tmp, ShenandoahHeap::WEAK_ROOTS_BITPOS);
+ __ bnez(tmp, slow_path);
+ __ bind(done);
+}
+#endif
+
// Special Shenandoah CAS implementation that handles false negatives due
// to concurrent evacuation. The service is more complex than a
// traditional CAS operation because the CAS operation is intended to
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
index c8a7c35fb83..e35e09c93da 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
* Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -41,23 +42,16 @@ class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
private:
- void satb_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register thread,
- Register tmp1,
- Register tmp2,
- bool tosca_live,
- bool expand_call);
- void shenandoah_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register thread,
- Register tmp,
- bool tosca_live,
- bool expand_call);
+ void satb_barrier(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp1,
+ Register tmp2,
+ bool tosca_live,
+ bool expand_call);
- void store_check(MacroAssembler* masm, Register obj);
+ void card_barrier(MacroAssembler* masm, Register obj);
void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
@@ -91,7 +85,9 @@ public:
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
-
+#ifdef COMPILER2
+ virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
+#endif
void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
};
diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
index 09dea62b6d1..163271a2f11 100644
--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -602,6 +602,27 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native");
}
+#ifdef COMPILER2
+void ZBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
+ BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_weak_handle_in_c2 {");
+
+ // Resolve weak handle using the standard implementation.
+ BarrierSetAssembler::try_resolve_weak_handle_in_c2(masm, obj, tmp, slow_path);
+
+ // Check if the oop is bad, in which case we need to take the slow path.
+ __ relocate(barrier_Relocation::spec(), [&] {
+ __ li16u(tmp, barrier_Relocation::unpatched);
+ }, ZBarrierRelocationFormatMarkBadMask);
+ __ andr(tmp, obj, tmp);
+ __ bnez(tmp, slow_path);
+
+ // Oop is okay, so we uncolor it.
+ __ srli(obj, obj, ZPointerLoadShift);
+
+ BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_weak_handle_in_c2");
+}
+#endif
+
static uint16_t patch_barrier_relocation_value(int format) {
switch (format) {
case ZBarrierRelocationFormatLoadBadMask:
diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
index 190d81acd0c..648cb3bf63d 100644
--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -170,6 +170,10 @@ public:
ZLoadBarrierStubC2* stub) const;
void generate_c2_store_barrier_stub(MacroAssembler* masm,
ZStoreBarrierStubC2* stub) const;
+ void try_resolve_weak_handle_in_c2(MacroAssembler* masm,
+ Register obj,
+ Register tmp,
+ Label& slow_path);
#endif // COMPILER2
void check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error);
diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
index 390ed2daeb9..21b119266e2 100644
--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -39,7 +39,7 @@ define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap nulls
define_pd_global(bool, DelayCompilerStubsGeneration, COMPILER2_OR_JVMCI);
define_pd_global(size_t, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
-define_pd_global(intx, CodeEntryAlignment, 64);
+define_pd_global(uint, CodeEntryAlignment, 64);
define_pd_global(intx, OptoLoopAlignment, 16);
#define DEFAULT_STACK_YELLOW_PAGES (2)
diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
index 258bc665770..20de2dbb2ad 100644
--- a/src/hotspot/cpu/riscv/icache_riscv.cpp
+++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
@@ -39,7 +39,8 @@ static int icache_flush(address addr, int lines, int magic) {
// We need to make sure stores happens before the I/D cache synchronization.
__asm__ volatile("fence rw, rw" : : : "memory");
- RiscvFlushIcache::flush((uintptr_t)addr, ((uintptr_t)lines) << ICache::log2_line_size);
+ uintptr_t end = (uintptr_t)addr + ((uintptr_t)lines << ICache::log2_line_size);
+ RiscvFlushIcache::flush((uintptr_t)addr, end);
return magic;
}
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
index 189c7c93d07..804c2072ba5 100644
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -237,15 +237,14 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset(
// Rsub_klass: subklass
//
// Kills:
-// x12, x15
+// x12
void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
Label& ok_is_subtype) {
assert(Rsub_klass != x10, "x10 holds superklass");
assert(Rsub_klass != x12, "x12 holds 2ndary super array length");
- assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr");
// Profile the not-null value's klass.
- profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15
+ profile_typecheck(x12, Rsub_klass); // blows x12
// Do the check.
check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12
@@ -1041,27 +1040,15 @@ void InterpreterMacroAssembler::profile_final_call(Register mdp) {
void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
- Register mdp,
- Register reg2,
- bool receiver_can_be_null) {
+ Register mdp) {
if (ProfileInterpreter) {
Label profile_continue;
// If no method data exists, go to profile_continue.
test_method_data_pointer(mdp, profile_continue);
- Label skip_receiver_profile;
- if (receiver_can_be_null) {
- Label not_null;
- // We are making a call. Increment the count for null receiver.
- increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
- j(skip_receiver_profile);
- bind(not_null);
- }
-
// Record the receiver type.
- record_klass_in_profile(receiver, mdp, reg2);
- bind(skip_receiver_profile);
+ profile_receiver_type(receiver, mdp, 0);
// The method data pointer needs to be updated to reflect the new target.
@@ -1072,153 +1059,6 @@ void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
}
}
-// This routine creates a state machine for updating the multi-row
-// type profile at a virtual call site (or other type-sensitive bytecode).
-// The machine visits each row (of receiver/count) until the receiver type
-// is found, or until it runs out of rows. At the same time, it remembers
-// the location of the first empty row. (An empty row records null for its
-// receiver, and can be allocated for a newly-observed receiver type.)
-// Because there are two degrees of freedom in the state, a simple linear
-// search will not work; it must be a decision tree. Hence this helper
-// function is recursive, to generate the required tree structured code.
-// It's the interpreter, so we are trading off code space for speed.
-// See below for example code.
-void InterpreterMacroAssembler::record_klass_in_profile_helper(
- Register receiver, Register mdp,
- Register reg2, Label& done) {
- if (TypeProfileWidth == 0) {
- increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
- } else {
- record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
- &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset);
- }
-}
-
-void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
- Register reg2, int start_row, Label& done, int total_rows,
- OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn) {
- int last_row = total_rows - 1;
- assert(start_row <= last_row, "must be work left to do");
- // Test this row for both the item and for null.
- // Take any of three different outcomes:
- // 1. found item => increment count and goto done
- // 2. found null => keep looking for case 1, maybe allocate this cell
- // 3. found something else => keep looking for cases 1 and 2
- // Case 3 is handled by a recursive call.
- for (int row = start_row; row <= last_row; row++) {
- Label next_test;
- bool test_for_null_also = (row == start_row);
-
- // See if the item is item[n].
- int item_offset = in_bytes(item_offset_fn(row));
- test_mdp_data_at(mdp, item_offset, item,
- (test_for_null_also ? reg2 : noreg),
- next_test);
- // (Reg2 now contains the item from the CallData.)
-
- // The item is item[n]. Increment count[n].
- int count_offset = in_bytes(item_count_offset_fn(row));
- increment_mdp_data_at(mdp, count_offset);
- j(done);
- bind(next_test);
-
- if (test_for_null_also) {
- Label found_null;
- // Failed the equality check on item[n]... Test for null.
- if (start_row == last_row) {
- // The only thing left to do is handle the null case.
- beqz(reg2, found_null);
- // Item did not match any saved item and there is no empty row for it.
- // Increment total counter to indicate polymorphic case.
- increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
- j(done);
- bind(found_null);
- break;
- }
- // Since null is rare, make it be the branch-taken case.
- beqz(reg2, found_null);
-
- // Put all the "Case 3" tests here.
- record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
- item_offset_fn, item_count_offset_fn);
-
- // Found a null. Keep searching for a matching item,
- // but remember that this is an empty (unused) slot.
- bind(found_null);
- }
- }
-
- // In the fall-through case, we found no matching item, but we
- // observed the item[start_row] is null.
- // Fill in the item field and increment the count.
- int item_offset = in_bytes(item_offset_fn(start_row));
- set_mdp_data_at(mdp, item_offset, item);
- int count_offset = in_bytes(item_count_offset_fn(start_row));
- mv(reg2, DataLayout::counter_increment);
- set_mdp_data_at(mdp, count_offset, reg2);
- if (start_row > 0) {
- j(done);
- }
-}
-
-// Example state machine code for three profile rows:
-// # main copy of decision tree, rooted at row[1]
-// if (row[0].rec == rec) then [
-// row[0].incr()
-// goto done
-// ]
-// if (row[0].rec != nullptr) then [
-// # inner copy of decision tree, rooted at row[1]
-// if (row[1].rec == rec) then [
-// row[1].incr()
-// goto done
-// ]
-// if (row[1].rec != nullptr) then [
-// # degenerate decision tree, rooted at row[2]
-// if (row[2].rec == rec) then [
-// row[2].incr()
-// goto done
-// ]
-// if (row[2].rec != nullptr) then [
-// count.incr()
-// goto done
-// ] # overflow
-// row[2].init(rec)
-// goto done
-// ] else [
-// # remember row[1] is empty
-// if (row[2].rec == rec) then [
-// row[2].incr()
-// goto done
-// ]
-// row[1].init(rec)
-// goto done
-// ]
-// else [
-// # remember row[0] is empty
-// if (row[1].rec == rec) then [
-// row[1].incr()
-// goto done
-// ]
-// if (row[2].rec == rec) then [
-// row[2].incr()
-// goto done
-// ]
-// row[0].init(rec)
-// goto done
-// ]
-// done:
-
-void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
- Register mdp, Register reg2) {
- assert(ProfileInterpreter, "must be profiling");
- Label done;
-
- record_klass_in_profile_helper(receiver, mdp, reg2, done);
-
- bind(done);
-}
-
void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
if (ProfileInterpreter) {
Label profile_continue;
@@ -1274,7 +1114,7 @@ void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
}
}
-void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass) {
if (ProfileInterpreter) {
Label profile_continue;
@@ -1287,7 +1127,7 @@ void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass,
mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
// Record the object type.
- record_klass_in_profile(klass, mdp, reg2);
+ profile_receiver_type(klass, mdp, 0);
}
update_mdp_by_constant(mdp, mdp_delta);
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
index a9df09d656a..df86f0dc532 100644
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -262,14 +262,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
Register test_value_out,
Label& not_equal_continue);
- void record_klass_in_profile(Register receiver, Register mdp,
- Register reg2);
- void record_klass_in_profile_helper(Register receiver, Register mdp,
- Register reg2, Label& done);
- void record_item_in_profile_helper(Register item, Register mdp,
- Register reg2, int start_row, Label& done, int total_rows,
- OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn);
-
void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
void update_mdp_by_constant(Register mdp_in, int constant);
@@ -282,12 +274,10 @@ class InterpreterMacroAssembler: public MacroAssembler {
void profile_not_taken_branch(Register mdp);
void profile_call(Register mdp);
void profile_final_call(Register mdp);
- void profile_virtual_call(Register receiver, Register mdp,
- Register t1,
- bool receiver_can_be_null = false);
+ void profile_virtual_call(Register receiver, Register mdp);
void profile_ret(Register return_bci, Register mdp);
void profile_null_seen(Register mdp);
- void profile_typecheck(Register mdp, Register klass, Register temp);
+ void profile_typecheck(Register mdp, Register klass);
void profile_typecheck_failed(Register mdp);
void profile_switch_default(Register mdp);
void profile_switch_case(Register index_in_scratch, Register mdp,
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
index 43b17a13c20..0e32c602d95 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -49,6 +49,7 @@
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/globalDefinitions.hpp"
+#include "utilities/integerCast.hpp"
#include "utilities/powerOfTwo.hpp"
#ifdef COMPILER2
#include "opto/compile.hpp"
@@ -543,6 +544,160 @@ void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file,
BLOCK_COMMENT("} verify_oop");
}
+// Handle the receiver type profile update given the "recv" klass.
+//
+// Normally updates the ReceiverData (RD) that starts at "mdp" + "mdp_offset".
+// If there are no matching or claimable receiver entries in RD, updates
+// the polymorphic counter.
+//
+// This code expected to run by either the interpreter or JIT-ed code, without
+// extra synchronization. For safety, receiver cells are claimed atomically, which
+// avoids grossly misrepresenting the profiles under concurrent updates. For speed,
+// counter updates are not atomic.
+//
+void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_offset) {
+ assert_different_registers(recv, mdp, t0, t1);
+
+ int base_receiver_offset = in_bytes(ReceiverTypeData::receiver_offset(0));
+ int end_receiver_offset = in_bytes(ReceiverTypeData::receiver_offset(ReceiverTypeData::row_limit()));
+ int poly_count_offset = in_bytes(CounterData::count_offset());
+ int receiver_step = in_bytes(ReceiverTypeData::receiver_offset(1)) - base_receiver_offset;
+ int receiver_to_count_step = in_bytes(ReceiverTypeData::receiver_count_offset(0)) - base_receiver_offset;
+
+ // Adjust for MDP offsets. Slots are pointer-sized, so is the global offset.
+ base_receiver_offset += mdp_offset;
+ end_receiver_offset += mdp_offset;
+ poly_count_offset += mdp_offset;
+
+#ifdef ASSERT
+ // We are about to walk the MDO slots without asking for offsets.
+ // Check that our math hits all the right spots.
+ for (uint c = 0; c < ReceiverTypeData::row_limit(); c++) {
+ int real_recv_offset = mdp_offset + in_bytes(ReceiverTypeData::receiver_offset(c));
+ int real_count_offset = mdp_offset + in_bytes(ReceiverTypeData::receiver_count_offset(c));
+ int offset = base_receiver_offset + receiver_step*c;
+ int count_offset = offset + receiver_to_count_step;
+ assert(offset == real_recv_offset, "receiver slot math");
+ assert(count_offset == real_count_offset, "receiver count math");
+ }
+ int real_poly_count_offset = mdp_offset + in_bytes(CounterData::count_offset());
+ assert(poly_count_offset == real_poly_count_offset, "poly counter math");
+#endif
+
+ // Corner case: no profile table. Increment poly counter and exit.
+ if (ReceiverTypeData::row_limit() == 0) {
+ increment(Address(mdp, poly_count_offset), DataLayout::counter_increment);
+ return;
+ }
+
+ Register offset = t1;
+
+ Label L_loop_search_receiver, L_loop_search_empty;
+ Label L_restart, L_found_recv, L_found_empty, L_polymorphic, L_count_update;
+
+ // The code here recognizes three major cases:
+ // A. Fastest: receiver found in the table
+ // B. Fast: no receiver in the table, and the table is full
+ // C. Slow: no receiver in the table, free slots in the table
+ //
+ // The case A performance is most important, as perfectly-behaved code would end up
+ // there, especially with larger TypeProfileWidth. The case B performance is
+ // important as well, this is where bulk of code would land for normally megamorphic
+ // cases. The case C performance is not essential, its job is to deal with installation
+ // races, we optimize for code density instead. Case C needs to make sure that receiver
+ // rows are only claimed once. This makes sure we never overwrite a row for another
+ // receiver and never duplicate the receivers in the list, making profile type-accurate.
+ //
+ // It is very tempting to handle these cases in a single loop, and claim the first slot
+ // without checking the rest of the table. But, profiling code should tolerate free slots
+ // in the table, as class unloading can clear them. After such cleanup, the receiver
+ // we need might be _after_ the free slot. Therefore, we need to let at least full scan
+ // to complete, before trying to install new slots. Splitting the code in several tight
+ // loops also helpfully optimizes for cases A and B.
+ //
+ // This code is effectively:
+ //
+ // restart:
+ // // Fastest: receiver is already installed
+ // for (i = 0; i < receiver_count(); i++) {
+ // if (receiver(i) == recv) goto found_recv(i);
+ // }
+ //
+ // // Fast: no receiver, but profile is full
+ // for (i = 0; i < receiver_count(); i++) {
+ // if (receiver(i) == null) goto found_null(i);
+ // }
+ // goto polymorphic
+ //
+ // // Slow: try to install receiver
+ // found_null(i):
+ // CAS(&receiver(i), null, recv);
+ // goto restart
+ //
+ // polymorphic:
+ // count++;
+ // return
+ //
+ // found_recv(i):
+ // *receiver_count(i)++
+ //
+
+ bind(L_restart);
+
+ // Fastest: receiver is already installed
+ mv(offset, base_receiver_offset);
+ bind(L_loop_search_receiver);
+ add(t0, mdp, offset);
+ ld(t0, Address(t0));
+ beq(recv, t0, L_found_recv);
+ add(offset, offset, receiver_step);
+ sub(t0, offset, end_receiver_offset);
+ bnez(t0, L_loop_search_receiver);
+
+ // Fast: no receiver, but profile is full
+ mv(offset, base_receiver_offset);
+ bind(L_loop_search_empty);
+ add(t0, mdp, offset);
+ ld(t0, Address(t0));
+ beqz(t0, L_found_empty);
+ add(offset, offset, receiver_step);
+ sub(t0, offset, end_receiver_offset);
+ bnez(t0, L_loop_search_empty);
+ j(L_polymorphic);
+
+ // Slow: try to install receiver
+ bind(L_found_empty);
+
+ // Atomically swing receiver slot: null -> recv.
+ //
+ // The update uses CAS, which clobbers t0. Therefore, t1
+ // is used to hold the destination address. This is safe because the
+ // offset is no longer needed after the address is computed.
+ add(t1, mdp, offset);
+ weak_cmpxchg(/*addr*/ t1, /*expected*/ zr, /*new*/ recv, Assembler::int64,
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::relaxed, /*result*/ t0);
+
+ // CAS success means the slot now has the receiver we want. CAS failure means
+ // something had claimed the slot concurrently: it can be the same receiver we want,
+ // or something else. Since this is a slow path, we can optimize for code density,
+ // and just restart the search from the beginning.
+ j(L_restart);
+
+ // Counter updates:
+ // Increment polymorphic counter instead of receiver slot.
+ bind(L_polymorphic);
+ mv(offset, poly_count_offset);
+ j(L_count_update);
+
+ // Found a receiver, convert its slot offset to corresponding count offset.
+ bind(L_found_recv);
+ add(offset, offset, receiver_to_count_step);
+
+ bind(L_count_update);
+ add(t1, mdp, offset);
+ increment(Address(t1), DataLayout::counter_increment);
+}
+
void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
if (!VerifyOops) {
return;
@@ -1793,14 +1948,12 @@ void MacroAssembler::restore_cpu_control_state_after_jni(Register tmp) {
}
}
-void MacroAssembler::push_reg(Register Rs)
-{
+void MacroAssembler::push_reg(Register Rs) {
subi(esp, esp, wordSize);
sd(Rs, Address(esp, 0));
}
-void MacroAssembler::pop_reg(Register Rd)
-{
+void MacroAssembler::pop_reg(Register Rd) {
ld(Rd, Address(esp, 0));
addi(esp, esp, wordSize);
}
@@ -1819,7 +1972,11 @@ int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
// Push integer registers in the bitset supplied. Don't push sp.
// Return the number of words pushed
-int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
+int MacroAssembler::push_reg(RegSet regset, Register stack) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
DEBUG_ONLY(int words_pushed = 0;)
unsigned char regs[32];
int count = bitset_to_regs(bitset, regs);
@@ -1839,7 +1996,11 @@ int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
return count;
}
-int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
+int MacroAssembler::pop_reg(RegSet regset, Register stack) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
DEBUG_ONLY(int words_popped = 0;)
unsigned char regs[32];
int count = bitset_to_regs(bitset, regs);
@@ -1861,7 +2022,11 @@ int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
// Push floating-point registers in the bitset supplied.
// Return the number of words pushed
-int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
+int MacroAssembler::push_fp(FloatRegSet regset, Register stack) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
DEBUG_ONLY(int words_pushed = 0;)
unsigned char regs[32];
int count = bitset_to_regs(bitset, regs);
@@ -1881,7 +2046,11 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
return count;
}
-int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
+int MacroAssembler::pop_fp(FloatRegSet regset, Register stack) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
DEBUG_ONLY(int words_popped = 0;)
unsigned char regs[32];
int count = bitset_to_regs(bitset, regs);
@@ -2567,7 +2736,11 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
#ifdef COMPILER2
// Push vector registers in the bitset supplied.
// Return the number of words pushed
-int MacroAssembler::push_v(unsigned int bitset, Register stack) {
+int MacroAssembler::push_v(VectorRegSet regset, Register stack) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
// Scan bitset to accumulate register pairs
@@ -2582,7 +2755,11 @@ int MacroAssembler::push_v(unsigned int bitset, Register stack) {
return count * vector_size_in_bytes / wordSize;
}
-int MacroAssembler::pop_v(unsigned int bitset, Register stack) {
+int MacroAssembler::pop_v(VectorRegSet regset, Register stack) {
+ if (regset.bits() == 0) {
+ return 0;
+ }
+ auto bitset = integer_cast(regset.bits());
int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
// Scan bitset to accumulate register pairs
@@ -3357,19 +3534,30 @@ void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, R
sd(tmp1, adr);
}
-void MacroAssembler::cmp_klass_compressed(Register oop, Register trial_klass, Register tmp, Label &L, bool equal) {
+void MacroAssembler::cmp_klass_beq(Register obj, Register klass,
+ Register tmp1, Register tmp2,
+ Label &L, bool is_far) {
+ assert_different_registers(obj, klass, tmp1, tmp2);
if (UseCompactObjectHeaders) {
- load_narrow_klass_compact(tmp, oop);
- } else if (UseCompressedClassPointers) {
- lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+ load_narrow_klass_compact(tmp1, obj);
} else {
- ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+ lwu(tmp1, Address(obj, oopDesc::klass_offset_in_bytes()));
}
- if (equal) {
- beq(trial_klass, tmp, L);
+ decode_klass_not_null(tmp1, tmp2);
+ beq(klass, tmp1, L, is_far);
+}
+
+void MacroAssembler::cmp_klass_bne(Register obj, Register klass,
+ Register tmp1, Register tmp2,
+ Label &L, bool is_far) {
+ assert_different_registers(obj, klass, tmp1, tmp2);
+ if (UseCompactObjectHeaders) {
+ load_narrow_klass_compact(tmp1, obj);
} else {
- bne(trial_klass, tmp, L);
+ lwu(tmp1, Address(obj, oopDesc::klass_offset_in_bytes()));
}
+ decode_klass_not_null(tmp1, tmp2);
+ bne(klass, tmp1, L, is_far);
}
// Move an oop into a register.
@@ -3587,11 +3775,9 @@ void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
if (UseCompactObjectHeaders) {
load_narrow_klass_compact(dst, src);
decode_klass_not_null(dst, tmp);
- } else if (UseCompressedClassPointers) {
+ } else {
lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
decode_klass_not_null(dst, tmp);
- } else {
- ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
}
}
@@ -3599,20 +3785,15 @@ void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
// FIXME: Should this be a store release? concurrent gcs assumes
// klass length is valid if klass field is not null.
assert(!UseCompactObjectHeaders, "not with compact headers");
- if (UseCompressedClassPointers) {
- encode_klass_not_null(src, tmp);
- sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
- } else {
- sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
- }
+ encode_klass_not_null(src, tmp);
+ sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
+
}
void MacroAssembler::store_klass_gap(Register dst, Register src) {
assert(!UseCompactObjectHeaders, "not with compact headers");
- if (UseCompressedClassPointers) {
- // Store to klass gap in destination
- sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
- }
+ // Store to klass gap in destination
+ sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
}
void MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
@@ -3621,7 +3802,6 @@ void MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
}
void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
- assert(UseCompressedClassPointers, "should only be used for compressed headers");
assert_different_registers(dst, tmp);
assert_different_registers(src, tmp);
@@ -3652,8 +3832,6 @@ void MacroAssembler::encode_klass_not_null(Register r, Register tmp) {
}
void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
- assert(UseCompressedClassPointers, "should only be used for compressed headers");
-
if (CompressedKlassPointers::base() == nullptr) {
if (CompressedKlassPointers::shift() != 0) {
srli(dst, src, CompressedKlassPointers::shift());
@@ -5110,9 +5288,8 @@ void MacroAssembler::get_thread(Register thread) {
}
void MacroAssembler::load_byte_map_base(Register reg) {
- CardTable::CardValue* byte_map_base =
- ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
- mv(reg, (uint64_t)byte_map_base);
+ CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set();
+ mv(reg, (uint64_t)ctbs->card_table_base_const());
}
void MacroAssembler::build_frame(int framesize) {
@@ -5184,7 +5361,6 @@ void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
}
void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
- assert (UseCompressedClassPointers, "should only be used for compressed headers");
assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
int index = oop_recorder()->find_index(k);
@@ -5264,12 +5440,9 @@ int MacroAssembler::ic_check(int end_alignment) {
if (UseCompactObjectHeaders) {
load_narrow_klass_compact(tmp1, receiver);
lwu(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
- } else if (UseCompressedClassPointers) {
+ } else {
lwu(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
lwu(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
- } else {
- ld(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
- ld(tmp2, Address(data, CompiledICData::speculated_klass_offset()));
}
Label ic_hit;
@@ -5390,13 +5563,6 @@ void MacroAssembler::decrementw(const Address dst, int32_t value, Register tmp1,
sw(tmp1, adr);
}
-void MacroAssembler::cmpptr(Register src1, const Address &src2, Label& equal, Register tmp) {
- assert_different_registers(src1, tmp);
- assert(src2.getMode() == Address::literal, "must be applied to a literal address");
- ld(tmp, src2);
- beq(src1, tmp, equal);
-}
-
void MacroAssembler::load_method_holder_cld(Register result, Register method) {
load_method_holder(result, method);
ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
index 3b021388fa5..4cc55e7ae23 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -198,7 +198,12 @@ class MacroAssembler: public Assembler {
void load_klass(Register dst, Register src, Register tmp = t0);
void load_narrow_klass_compact(Register dst, Register src);
void store_klass(Register dst, Register src, Register tmp = t0);
- void cmp_klass_compressed(Register oop, Register trial_klass, Register tmp, Label &L, bool equal);
+ void cmp_klass_beq(Register obj, Register klass,
+ Register tmp1, Register tmp2,
+ Label &L, bool is_far = false);
+ void cmp_klass_bne(Register obj, Register klass,
+ Register tmp1, Register tmp2,
+ Label &L, bool is_far = false);
void encode_klass_not_null(Register r, Register tmp = t0);
void decode_klass_not_null(Register r, Register tmp = t0);
@@ -390,6 +395,8 @@ class MacroAssembler: public Assembler {
Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+ void profile_receiver_type(Register recv, Register mdp, int mdp_offset);
+
// only if +VerifyOops
void _verify_oop(Register reg, const char* s, const char* file, int line);
void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
@@ -811,15 +818,6 @@ class MacroAssembler: public Assembler {
void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
private:
- int push_reg(unsigned int bitset, Register stack);
- int pop_reg(unsigned int bitset, Register stack);
- int push_fp(unsigned int bitset, Register stack);
- int pop_fp(unsigned int bitset, Register stack);
-#ifdef COMPILER2
- int push_v(unsigned int bitset, Register stack);
- int pop_v(unsigned int bitset, Register stack);
-#endif // COMPILER2
-
// The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G.
// The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G.
bool is_valid_32bit_offset(int64_t x) {
@@ -837,15 +835,19 @@ private:
}
public:
+ // Stack push and pop individual 64 bit registers
void push_reg(Register Rs);
void pop_reg(Register Rd);
- void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); }
- void pop_reg(RegSet regs, Register stack) { if (regs.bits()) pop_reg(regs.bits(), stack); }
- void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
- void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
+
+ int push_reg(RegSet regset, Register stack);
+ int pop_reg(RegSet regset, Register stack);
+
+ int push_fp(FloatRegSet regset, Register stack);
+ int pop_fp(FloatRegSet regset, Register stack);
+
#ifdef COMPILER2
- void push_v(VectorRegSet regs, Register stack) { if (regs.bits()) push_v(regs.bits(), stack); }
- void pop_v(VectorRegSet regs, Register stack) { if (regs.bits()) pop_v(regs.bits(), stack); }
+ int push_v(VectorRegSet regset, Register stack);
+ int pop_v(VectorRegSet regset, Register stack);
#endif // COMPILER2
// Push and pop everything that might be clobbered by a native
@@ -1346,9 +1348,8 @@ public:
void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
- void cmpptr(Register src1, const Address &src2, Label& equal, Register tmp = t0);
-
void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr);
+
void load_method_holder_cld(Register result, Register method);
void load_method_holder(Register holder, Register method);
diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
index d770999df96..e80dedf58ed 100644
--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
@@ -72,17 +72,22 @@ void MethodHandles::verify_klass(MacroAssembler* _masm,
InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id);
Klass* klass = vmClasses::klass_at(klass_id);
Register temp1 = t1;
- Register temp2 = t0; // used by MacroAssembler::cmpptr
+ Register temp2 = t0;
Label L_ok, L_bad;
BLOCK_COMMENT("verify_klass {");
__ verify_oop(obj);
__ beqz(obj, L_bad);
+
__ push_reg(RegSet::of(temp1, temp2), sp);
__ load_klass(temp1, obj, temp2);
- __ cmpptr(temp1, ExternalAddress((address) klass_addr), L_ok);
+ __ ld(temp2, ExternalAddress((address)klass_addr));
+ __ beq(temp1, temp2, L_ok);
+
intptr_t super_check_offset = klass->super_check_offset();
__ ld(temp1, Address(temp1, super_check_offset));
- __ cmpptr(temp1, ExternalAddress((address) klass_addr), L_ok);
+ __ ld(temp2, ExternalAddress((address)klass_addr));
+ __ beq(temp1, temp2, L_ok);
+
__ pop_reg(RegSet::of(temp1, temp2), sp);
__ bind(L_bad);
__ stop(error_message);
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index 730dd68dd88..e236d03e6d2 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
// Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -1801,13 +1801,8 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{
assert_cond(st != nullptr);
st->print_cr("# MachUEPNode");
- if (UseCompressedClassPointers) {
- st->print_cr("\tlwu t1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
- st->print_cr("\tlwu t2, [t0 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
- } else {
- st->print_cr("\tld t1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
- st->print_cr("\tld t2, [t0 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
- }
+ st->print_cr("\tlwu t1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+ st->print_cr("\tlwu t2, [t0 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
st->print_cr("\tbeq t1, t2, ic_hit");
st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check");
st->print_cr("\tic_hit:");
@@ -2060,11 +2055,8 @@ bool Matcher::is_generic_vector(MachOper* opnd) {
return false;
}
+#ifdef ASSERT
// Return whether or not this register is ever used as an argument.
-// This function is used on startup to build the trampoline stubs in
-// generateOptoStub. Registers not mentioned will be killed by the VM
-// call in the trampoline, and arguments in those registers not be
-// available to the callee.
bool Matcher::can_be_java_arg(int reg)
{
return
@@ -2085,11 +2077,7 @@ bool Matcher::can_be_java_arg(int reg)
reg == F16_num || reg == F16_H_num ||
reg == F17_num || reg == F17_H_num;
}
-
-bool Matcher::is_spillable_arg(int reg)
-{
- return can_be_java_arg(reg);
-}
+#endif
uint Matcher::int_pressure_limit()
{
@@ -2118,10 +2106,6 @@ uint Matcher::float_pressure_limit()
return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.size() : FLOATPRESSURE;
}
-bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
- return false;
-}
-
const RegMask& Matcher::divI_proj_mask() {
ShouldNotReachHere();
return RegMask::EMPTY;
@@ -2274,7 +2258,7 @@ encode %{
} else if (rtype == relocInfo::metadata_type) {
__ mov_metadata(dst_reg, (Metadata*)con);
} else {
- assert(rtype == relocInfo::none, "unexpected reloc type");
+ assert(rtype == relocInfo::none || rtype == relocInfo::external_word_type, "unexpected reloc type");
__ mv(dst_reg, $src$$constant);
}
}
@@ -2559,11 +2543,6 @@ frame %{
// Compiled code's Frame Pointer
frame_pointer(R2);
- // Interpreter stores its frame pointer in a register which is
- // stored to the stack by I2CAdaptors.
- // I2CAdaptors convert from interpreted java to compiled java.
- interpreter_frame_pointer(R8);
-
// Stack alignment requirement
stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
@@ -8168,6 +8147,22 @@ instruct unnecessary_membar_rvtso() %{
ins_pipe(real_empty);
%}
+instruct membar_storeload_rvtso() %{
+ predicate(UseZtso);
+ match(MemBarStoreLoad);
+ ins_cost(VOLATILE_REF_COST);
+
+ format %{ "#@membar_storeload_rvtso\n\t"
+ "fence w, r"%}
+
+ ins_encode %{
+ __ block_comment("membar_storeload_rvtso");
+ __ membar(MacroAssembler::StoreLoad);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
instruct membar_volatile_rvtso() %{
predicate(UseZtso);
match(MemBarVolatile);
@@ -8198,6 +8193,22 @@ instruct unnecessary_membar_volatile_rvtso() %{
ins_pipe(real_empty);
%}
+instruct membar_full_rvtso() %{
+ predicate(UseZtso);
+ match(MemBarFull);
+ ins_cost(VOLATILE_REF_COST);
+
+ format %{ "#@membar_full_rvtso\n\t"
+ "fence rw, rw" %}
+
+ ins_encode %{
+ __ block_comment("membar_full_rvtso");
+ __ membar(MacroAssembler::AnyAny);
+ %}
+
+ ins_pipe(pipe_slow);
+%}
+
// RVWMO
instruct membar_aqcuire_rvwmo() %{
@@ -8247,6 +8258,22 @@ instruct membar_storestore_rvwmo() %{
ins_pipe(pipe_serial);
%}
+instruct membar_storeload_rvwmo() %{
+ predicate(!UseZtso);
+ match(MemBarStoreLoad);
+ ins_cost(VOLATILE_REF_COST);
+
+ format %{ "#@membar_storeload_rvwmo\n\t"
+ "fence w, r"%}
+
+ ins_encode %{
+ __ block_comment("membar_storeload_rvwmo");
+ __ membar(MacroAssembler::StoreLoad);
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
instruct membar_volatile_rvwmo() %{
predicate(!UseZtso);
match(MemBarVolatile);
@@ -8291,6 +8318,22 @@ instruct unnecessary_membar_volatile_rvwmo() %{
ins_pipe(real_empty);
%}
+instruct membar_full_rvwmo() %{
+ predicate(!UseZtso);
+ match(MemBarFull);
+ ins_cost(VOLATILE_REF_COST);
+
+ format %{ "#@membar_full_rvwmo\n\t"
+ "fence rw, rw" %}
+
+ ins_encode %{
+ __ block_comment("membar_full_rvwmo");
+ __ membar(MacroAssembler::AnyAny);
+ %}
+
+ ins_pipe(pipe_serial);
+%}
+
instruct spin_wait() %{
predicate(UseZihintpause);
match(OnSpinWait);
diff --git a/src/hotspot/cpu/riscv/stubDeclarations_riscv.hpp b/src/hotspot/cpu/riscv/stubDeclarations_riscv.hpp
index f977d759d20..890e354fd27 100644
--- a/src/hotspot/cpu/riscv/stubDeclarations_riscv.hpp
+++ b/src/hotspot/cpu/riscv/stubDeclarations_riscv.hpp
@@ -29,28 +29,32 @@
#define STUBGEN_PREUNIVERSE_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(preuniverse, 0) \
#define STUBGEN_INITIAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(initial, 10000) \
#define STUBGEN_CONTINUATION_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(continuation, 2000) \
#define STUBGEN_COMPILER_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(compiler, 45000) \
do_stub(compiler, compare_long_string_LL) \
do_arch_entry(riscv, compiler, compare_long_string_LL, \
@@ -81,7 +85,8 @@
#define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(final, 20000 ZGC_ONLY(+10000)) \
do_stub(final, copy_byte_f) \
do_arch_entry(riscv, final, copy_byte_f, copy_byte_f, \
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
index 127ac9f6951..4656b5c0d41 100644
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2025, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2025, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -3070,8 +3070,7 @@ class StubGenerator: public StubCodeGenerator {
const Register tmp = x30, tmpLval = x12;
int base_offset = arrayOopDesc::base_offset_in_bytes(T_BYTE);
- assert((base_offset % (UseCompactObjectHeaders ? 4 :
- (UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
+ assert((base_offset % (UseCompactObjectHeaders ? 4 : 8)) == 0, "Must be");
#ifdef ASSERT
if (AvoidUnalignedAccesses) {
@@ -3128,8 +3127,7 @@ class StubGenerator: public StubCodeGenerator {
tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x12;
int base_offset = arrayOopDesc::base_offset_in_bytes(T_BYTE);
- assert((base_offset % (UseCompactObjectHeaders ? 4 :
- (UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
+ assert((base_offset % (UseCompactObjectHeaders ? 4 : 8)) == 0, "Must be");
Register strU = isLU ? str2 : str1,
strL = isLU ? str1 : str2,
@@ -7350,7 +7348,7 @@ static const int64_t right_3_bits = right_n_bits(3);
}
public:
- StubGenerator(CodeBuffer* code, BlobId blob_id) : StubCodeGenerator(code, blob_id) {
+ StubGenerator(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) : StubCodeGenerator(code, blob_id, stub_data) {
switch(blob_id) {
case BlobId::stubgen_preuniverse_id:
generate_preuniverse_stubs();
@@ -7374,6 +7372,6 @@ static const int64_t right_3_bits = right_n_bits(3);
}
}; // end class declaration
-void StubGenerator_generate(CodeBuffer* code, BlobId blob_id) {
- StubGenerator g(code, blob_id);
+void StubGenerator_generate(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) {
+ StubGenerator g(code, blob_id, stub_data);
}
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
index 2aac95d71fa..b7f69eff9fa 100644
--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
@@ -42,8 +42,12 @@
#define DEFINE_ARCH_ENTRY_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
address StubRoutines:: arch :: STUB_FIELD_NAME(field_name) = CAST_FROM_FN_PTR(address, init_function);
-STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY, DEFINE_ARCH_ENTRY_INIT)
+#define DEFINE_ARCH_ENTRY_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ address StubRoutines:: arch :: STUB_FIELD_NAME(field_name) [count] ;
+STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY, DEFINE_ARCH_ENTRY_INIT, DEFINE_ARCH_ENTRY_ARRAY)
+
+#undef DEFINE_ARCH_ENTRY_ARRAY
#undef DEFINE_ARCH_ENTRY_INIT
#undef DEFINE_ARCH_ENTRY
@@ -501,3 +505,9 @@ ATTRIBUTE_ALIGNED(4096) juint StubRoutines::riscv::_crc_table[] =
0x751997d0UL, 0x00000001UL,
0xccaa009eUL, 0x00000000UL,
};
+
+#if INCLUDE_CDS
+// nothing to do for riscv
+void StubRoutines::init_AOTAddressTable() {
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
index 2c4e7210413..ec67a338052 100644
--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
@@ -61,9 +61,13 @@ class riscv {
#define DECLARE_ARCH_ENTRY_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
DECLARE_ARCH_ENTRY(arch, blob_name, stub_name, field_name, getter_name)
-private:
- STUBGEN_ARCH_ENTRIES_DO(DECLARE_ARCH_ENTRY, DECLARE_ARCH_ENTRY_INIT)
+#define DECLARE_ARCH_ENTRY_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ static address STUB_FIELD_NAME(field_name) [count] ;
+private:
+ STUBGEN_ARCH_ENTRIES_DO(DECLARE_ARCH_ENTRY, DECLARE_ARCH_ENTRY_INIT, DECLARE_ARCH_ENTRY_ARRAY)
+
+#undef DECLARE_ARCH_ENTRY_ARRAY
#undef DECLARE_ARCH_ENTRY_INIT
#undef DECLARE_ARCH_ENTRY
@@ -79,8 +83,12 @@ private:
#define DEFINE_ARCH_ENTRY_GETTER_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
DEFINE_ARCH_ENTRY_GETTER(arch, blob_name, stub_name, field_name, getter_name)
- STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY_GETTER, DEFINE_ARCH_ENTRY_GETTER_INIT)
+#define DEFINE_ARCH_ENTRY_GETTER_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ static address getter_name(int idx) { return STUB_FIELD_NAME(field_name) [idx] ; }
+ STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY_GETTER, DEFINE_ARCH_ENTRY_GETTER_INIT, DEFINE_ARCH_ENTRY_GETTER_ARRAY)
+
+#undef DEFINE_ARCH_ENTRY_GETTER_ARRAY
#undef DEFINE_ARCH_ENTRY_GETTER_INIT
#undef DEFINE_ARCH_ENTRY_GETTER
diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
index 5a3644f70bb..5cc725e3af4 100644
--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
@@ -708,7 +708,6 @@ void TemplateTable::index_check(Register array, Register index) {
__ mv(x11, index);
}
Label ok;
- __ sext(index, index, 32);
__ bltu(index, length, ok);
__ mv(x13, array);
__ mv(t1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
@@ -1052,7 +1051,7 @@ void TemplateTable::aastore() {
transition(vtos, vtos);
// stack: ..., array, index, value
__ ld(x10, at_tos()); // value
- __ ld(x12, at_tos_p1()); // index
+ __ lw(x12, at_tos_p1()); // index
__ ld(x13, at_tos_p2()); // array
index_check(x13, x12); // kills x11
@@ -1462,9 +1461,9 @@ void TemplateTable::iinc() {
transition(vtos, vtos);
__ load_signed_byte(x11, at_bcp(2)); // get constant
locals_index(x12);
- __ ld(x10, iaddress(x12, x10, _masm));
+ __ lw(x10, iaddress(x12, x10, _masm));
__ addw(x10, x10, x11);
- __ sd(x10, iaddress(x12, t0, _masm));
+ __ sw(x10, iaddress(x12, t0, _masm));
}
void TemplateTable::wide_iinc() {
@@ -1477,9 +1476,9 @@ void TemplateTable::wide_iinc() {
__ orr(x11, x11, t1);
locals_index_wide(x12);
- __ ld(x10, iaddress(x12, t0, _masm));
+ __ lw(x10, iaddress(x12, t0, _masm));
__ addw(x10, x10, x11);
- __ sd(x10, iaddress(x12, t0, _masm));
+ __ sw(x10, iaddress(x12, t0, _masm));
}
void TemplateTable::convert() {
@@ -3280,7 +3279,7 @@ void TemplateTable::invokevirtual_helper(Register index,
__ load_klass(x10, recv);
// profile this call
- __ profile_virtual_call(x10, xlocals, x13);
+ __ profile_virtual_call(x10, xlocals);
// get target Method & entry point
__ lookup_virtual_method(x10, index, method);
@@ -3407,7 +3406,7 @@ void TemplateTable::invokeinterface(int byte_no) {
/*return_method=*/false);
// profile this call
- __ profile_virtual_call(x13, x30, x9);
+ __ profile_virtual_call(x13, x30);
// Get declaring interface class from method, and itable index
__ load_method_holder(x10, xmethod);
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
index 36f0864da0b..3a6415d52bd 100644
--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@@ -420,11 +420,6 @@ void VM_Version::c2_initialize() {
FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
}
- // UseSHA
- if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) {
- FLAG_SET_DEFAULT(UseSHA, false);
- }
-
// AES
if (UseZvkn) {
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
index 03c843efc69..11a88dfedd7 100644
--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
@@ -55,7 +55,7 @@ class VM_Version : public Abstract_VM_Version {
public:
RVFeatureValue(const char* pretty, int linux_bit_num, bool fstring) :
- _pretty(pretty), _feature_string(fstring), _linux_feature_bit(nth_bit(linux_bit_num)) {
+ _pretty(pretty), _feature_string(fstring), _linux_feature_bit(nth_bit(linux_bit_num)) {
}
virtual void enable_feature(int64_t value = 0) = 0;
virtual void disable_feature() = 0;
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index 93d6051aa76..e1d8d062c23 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -2251,9 +2251,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
// but not necessarily exactly of type default_type.
NearLabel known_ok, halt;
metadata2reg(default_type->constant_encoding(), tmp);
- if (UseCompressedClassPointers) {
- __ encode_klass_not_null(tmp);
- }
+ __ encode_klass_not_null(tmp);
if (basic_type != T_OBJECT) {
__ cmp_klass(tmp, dst, Z_R1_scratch);
@@ -2540,13 +2538,8 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
// Get object class.
// Not a safepoint as obj null check happens earlier.
if (op->fast_check()) {
- if (UseCompressedClassPointers) {
- __ load_klass(klass_RInfo, obj);
- __ compareU64_and_branch(k_RInfo, klass_RInfo, Assembler::bcondNotEqual, *failure_target);
- } else {
- __ z_cg(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
- __ branch_optimized(Assembler::bcondNotEqual, *failure_target);
- }
+ __ load_klass(klass_RInfo, obj);
+ __ compareU64_and_branch(k_RInfo, klass_RInfo, Assembler::bcondNotEqual, *failure_target);
// Successful cast, fall through to profile or jump.
} else {
bool need_slow_path = !k->is_loaded() ||
diff --git a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
index 993c1a1b552..813143938f9 100644
--- a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -107,10 +107,10 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
}
if (len->is_valid()) {
- // Length will be in the klass gap, if one exists.
+ // Length will be in the klass gap.
z_st(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
- } else if (UseCompressedClassPointers && !UseCompactObjectHeaders) {
- store_klass_gap(Rzero, obj); // Zero klass gap for compressed oops.
+ } else if (!UseCompactObjectHeaders) {
+ store_klass_gap(Rzero, obj); // Zero klass gap.
}
}
diff --git a/src/hotspot/cpu/s390/c1_globals_s390.hpp b/src/hotspot/cpu/s390/c1_globals_s390.hpp
index 25e46cd1509..64cc239800a 100644
--- a/src/hotspot/cpu/s390/c1_globals_s390.hpp
+++ b/src/hotspot/cpu/s390/c1_globals_s390.hpp
@@ -51,8 +51,6 @@ define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M);
define_pd_global(size_t, CodeCacheExpansionSize, 32*K);
define_pd_global(size_t, CodeCacheMinBlockLength, 1);
define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
-define_pd_global(bool, NeverActAsServerClassMachine, true);
-define_pd_global(size_t, NewSizeThreadIncrease, 16*K);
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
#endif // !COMPILER2
diff --git a/src/hotspot/cpu/s390/c2_globals_s390.hpp b/src/hotspot/cpu/s390/c2_globals_s390.hpp
index 431a36cda07..eee3a8588c3 100644
--- a/src/hotspot/cpu/s390/c2_globals_s390.hpp
+++ b/src/hotspot/cpu/s390/c2_globals_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2018 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -44,9 +44,8 @@ define_pd_global(intx, CompileThreshold, 10000);
define_pd_global(intx, OnStackReplacePercentage, 140);
define_pd_global(intx, ConditionalMoveLimit, 4);
-define_pd_global(intx, FreqInlineSize, 175);
+define_pd_global(intx, FreqInlineSize, 325);
define_pd_global(intx, InteriorEntryAlignment, 4);
-define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, RegisterCostAreaRatio, 12000);
define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
@@ -79,7 +78,4 @@ define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on z/Architecture.
-// Ergonomics related flags
-define_pd_global(bool, NeverActAsServerClassMachine, false);
-
#endif // CPU_S390_C2_GLOBALS_S390_HPP
diff --git a/src/hotspot/cpu/s390/compiledIC_s390.cpp b/src/hotspot/cpu/s390/compiledIC_s390.cpp
index 8501a0cb346..43f5d80250e 100644
--- a/src/hotspot/cpu/s390/compiledIC_s390.cpp
+++ b/src/hotspot/cpu/s390/compiledIC_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2019 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -29,9 +29,6 @@
#include "memory/resourceArea.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/safepoint.hpp"
-#ifdef COMPILER2
-#include "opto/matcher.hpp"
-#endif
// ----------------------------------------------------------------------------
@@ -39,7 +36,6 @@
#define __ masm->
address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address mark/* = nullptr*/) {
-#ifdef COMPILER2
// Stub is fixed up when the corresponding call is converted from calling
// compiled code to calling interpreted code.
if (mark == nullptr) {
@@ -55,7 +51,7 @@ address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address ma
__ relocate(static_stub_Relocation::spec(mark));
AddressLiteral meta = __ allocate_metadata_address(nullptr);
- bool success = __ load_const_from_toc(as_Register(Matcher::inline_cache_reg_encode()), meta);
+ bool success = __ load_const_from_toc(Z_inline_cache, meta);
__ set_inst_mark();
AddressLiteral a((address)-1);
@@ -67,10 +63,6 @@ address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address ma
__ z_br(Z_R1);
__ end_a_stub(); // Update current stubs pointer and restore insts_end.
return stub;
-#else
- ShouldNotReachHere();
- return nullptr;
-#endif
}
#undef __
diff --git a/src/hotspot/cpu/s390/downcallLinker_s390.cpp b/src/hotspot/cpu/s390/downcallLinker_s390.cpp
index ccd8002da37..f1c41d05b5c 100644
--- a/src/hotspot/cpu/s390/downcallLinker_s390.cpp
+++ b/src/hotspot/cpu/s390/downcallLinker_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -129,7 +129,7 @@ void DowncallLinker::StubGenerator::generate() {
assert(!_needs_return_buffer, "unexpected needs_return_buffer");
RegSpiller out_reg_spiller(_output_registers);
- int spill_offset = allocated_frame_size;
+ int out_spill_offset = allocated_frame_size;
allocated_frame_size += BytesPerWord;
StubLocations locs;
@@ -153,6 +153,18 @@ void DowncallLinker::StubGenerator::generate() {
GrowableArray out_regs = ForeignGlobals::replace_place_holders(_input_registers, locs);
ArgumentShuffle arg_shuffle(filtered_java_regs, out_regs, _abi._scratch1);
+ // Need to spill for state capturing runtime call.
+ // The area spilled into is distinct from the capture state buffer.
+ RegSpiller in_reg_spiller(out_regs);
+ int in_spill_offset = -1;
+ if (_captured_state_mask != 0) {
+ // The spill area cannot be shared with the out_spill since
+ // spilling needs to happen before the call. Allocate a new
+ // region in the stack for this spill space.
+ in_spill_offset = allocated_frame_size;
+ allocated_frame_size += in_reg_spiller.spill_size_bytes();
+ }
+
#ifndef PRODUCT
LogTarget(Trace, foreign, downcall) lt;
if (lt.is_enabled()) {
@@ -192,6 +204,21 @@ void DowncallLinker::StubGenerator::generate() {
arg_shuffle.generate(_masm, shuffle_reg, frame::z_jit_out_preserve_size, _abi._shadow_space_bytes);
__ block_comment("} argument_shuffle");
+ if (_captured_state_mask != 0) {
+ assert(in_spill_offset != -1, "must be");
+ __ block_comment("{ load initial thread local");
+ in_reg_spiller.generate_spill(_masm, in_spill_offset);
+
+ // Copy the contents of the capture state buffer into thread local
+ __ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state_pre));
+ __ z_lg(Z_ARG1, Address(Z_SP, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
+ __ load_const_optimized(Z_ARG2, _captured_state_mask);
+ __ call(call_target_address);
+
+ in_reg_spiller.generate_fill(_masm, in_spill_offset);
+ __ block_comment("} load initial thread local");
+ }
+
__ call(as_Register(locs.get(StubLocations::TARGET_ADDRESS)));
//////////////////////////////////////////////////////////////////////////////
@@ -199,14 +226,14 @@ void DowncallLinker::StubGenerator::generate() {
if (_captured_state_mask != 0) {
__ block_comment("save_thread_local {");
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
- __ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state));
+ __ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state_post));
__ z_lg(Z_ARG1, Address(Z_SP, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
__ load_const_optimized(Z_ARG2, _captured_state_mask);
__ call(call_target_address);
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
__ block_comment("} save_thread_local");
}
@@ -259,13 +286,13 @@ void DowncallLinker::StubGenerator::generate() {
__ bind(L_safepoint_poll_slow_path);
// Need to save the native result registers around any runtime calls.
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, JavaThread::check_special_condition_for_native_trans));
__ z_lgr(Z_ARG1, Z_thread);
__ call(call_target_address);
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
__ z_bru(L_after_safepoint_poll);
__ block_comment("} L_safepoint_poll_slow_path");
@@ -275,12 +302,12 @@ void DowncallLinker::StubGenerator::generate() {
__ bind(L_reguard);
// Need to save the native result registers around any runtime calls.
- out_reg_spiller.generate_spill(_masm, spill_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_offset);
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, SharedRuntime::reguard_yellow_pages));
__ call(call_target_address);
- out_reg_spiller.generate_fill(_masm, spill_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_offset);
__ z_bru(L_after_reguard);
diff --git a/src/hotspot/cpu/s390/frame_s390.hpp b/src/hotspot/cpu/s390/frame_s390.hpp
index ad754706367..bcdeec43e1a 100644
--- a/src/hotspot/cpu/s390/frame_s390.hpp
+++ b/src/hotspot/cpu/s390/frame_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -463,7 +463,7 @@
// Accessors
- inline intptr_t* fp() const { return _fp; }
+ inline intptr_t* fp() const { assert_absolute(); return _fp; }
private:
diff --git a/src/hotspot/cpu/s390/frame_s390.inline.hpp b/src/hotspot/cpu/s390/frame_s390.inline.hpp
index dea0e72581f..6fcd36c57d1 100644
--- a/src/hotspot/cpu/s390/frame_s390.inline.hpp
+++ b/src/hotspot/cpu/s390/frame_s390.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -133,10 +133,10 @@ inline void frame::interpreter_frame_set_monitors(BasicObjectLock* monitors) {
// Return unique id for this frame. The id must have a value where we
// can distinguish identity and younger/older relationship. null
-// represents an invalid (incomparable) frame.
+// represents an invalid (incomparable) frame. Should not be called for heap frames.
inline intptr_t* frame::id(void) const {
// Use _fp. _sp or _unextended_sp wouldn't be correct due to resizing.
- return _fp;
+ return real_fp();
}
// Return true if this frame is older (less recent activation) than
diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
index 272136fc28c..617bc7cd00c 100644
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -129,6 +129,57 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
}
}
+static void generate_post_barrier(MacroAssembler* masm,
+ const Register store_addr,
+ const Register new_val,
+ const Register thread,
+ const Register tmp1,
+ const Register tmp2,
+ Label& done,
+ bool new_val_may_be_null) {
+
+ __ block_comment("generate_post_barrier {");
+
+ assert(thread == Z_thread, "must be");
+ assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
+
+ // Does store cross heap regions?
+ if (VM_Version::has_DistinctOpnds()) {
+ __ z_xgrk(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
+ } else {
+ __ z_lgr(tmp1, store_addr);
+ __ z_xgr(tmp1, new_val);
+ }
+ __ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
+ __ branch_optimized(Assembler::bcondEqual, done);
+
+ // Crosses regions, storing null?
+ if (new_val_may_be_null) {
+ __ z_ltgr(new_val, new_val);
+ __ z_bre(done);
+ } else {
+#ifdef ASSERT
+ __ z_ltgr(new_val, new_val);
+ __ asm_assert(Assembler::bcondNotZero, "null oop not allowed (G1 post)", 0x322); // Checked by caller.
+#endif
+ }
+
+ __ z_srag(tmp1, store_addr, CardTable::card_shift());
+
+ Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
+ __ z_alg(tmp1, card_table_addr); // tmp1 := card address
+
+ if(UseCondCardMark) {
+ __ z_cli(0, tmp1, G1CardTable::clean_card_val());
+ __ branch_optimized(Assembler::bcondNotEqual, done);
+ }
+
+ static_assert(G1CardTable::dirty_card_val() == 0, "must be to use z_mvi");
+ __ z_mvi(0, tmp1, G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
+
+ __ block_comment("} generate_post_barrier");
+}
+
#if defined(COMPILER2)
#undef __
@@ -204,57 +255,6 @@ void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
BLOCK_COMMENT("} generate_c2_pre_barrier_stub");
}
-static void generate_post_barrier(MacroAssembler* masm,
- const Register store_addr,
- const Register new_val,
- const Register thread,
- const Register tmp1,
- const Register tmp2,
- Label& done,
- bool new_val_may_be_null) {
-
- __ block_comment("generate_post_barrier {");
-
- assert(thread == Z_thread, "must be");
- assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
-
- // Does store cross heap regions?
- if (VM_Version::has_DistinctOpnds()) {
- __ z_xgrk(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
- } else {
- __ z_lgr(tmp1, store_addr);
- __ z_xgr(tmp1, new_val);
- }
- __ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
- __ branch_optimized(Assembler::bcondEqual, done);
-
- // Crosses regions, storing null?
- if (new_val_may_be_null) {
- __ z_ltgr(new_val, new_val);
- __ z_bre(done);
- } else {
-#ifdef ASSERT
- __ z_ltgr(new_val, new_val);
- __ asm_assert(Assembler::bcondNotZero, "null oop not allowed (G1 post)", 0x322); // Checked by caller.
-#endif
- }
-
- __ z_srag(tmp1, store_addr, CardTable::card_shift());
-
- Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
- __ z_alg(tmp1, card_table_addr); // tmp1 := card address
-
- if(UseCondCardMark) {
- __ z_cli(0, tmp1, G1CardTable::clean_card_val());
- __ branch_optimized(Assembler::bcondNotEqual, done);
- }
-
- static_assert(G1CardTable::dirty_card_val() == 0, "must be to use z_mvi");
- __ z_mvi(0, tmp1, G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
-
- __ block_comment("} generate_post_barrier");
-}
-
void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register store_addr,
Register new_val,
diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
index c6f5a4e119c..9fac231df47 100644
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -169,6 +169,11 @@ void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Re
__ z_lg(obj, 0, obj); // Resolve (untagged) jobject.
}
+void BarrierSetAssembler::try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
+ // Load the oop from the weak handle.
+ __ z_lg(obj, Address(obj));
+}
+
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
__ align(4, __ offset() + OFFSET_TO_PATCHABLE_DATA); // must align the following block which requires atomic updates
diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
index 65db915b672..8e76ec2f4b4 100644
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -58,6 +58,11 @@ public:
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
+ // Can be used in nmethods including native wrappers.
+ // Attention: obj will only be valid until next safepoint (no SATB barrier).
+ // (other platforms currently use it for C2 only: try_resolve_weak_handle_in_c2)
+ virtual void try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
+
virtual void nmethod_entry_barrier(MacroAssembler* masm);
virtual void barrier_stubs_init() {}
diff --git a/src/hotspot/cpu/s390/gc/shared/cardTableBarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/shared/cardTableBarrierSetAssembler_s390.cpp
index a0da6ebe682..9bb7f63ff31 100644
--- a/src/hotspot/cpu/s390/gc/shared/cardTableBarrierSetAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/gc/shared/cardTableBarrierSetAssembler_s390.cpp
@@ -83,8 +83,7 @@ void CardTableBarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Registe
void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count,
bool do_return) {
- CardTableBarrierSet* ctbs = barrier_set_cast(BarrierSet::barrier_set());
- CardTable* ct = ctbs->card_table();
+ CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set();
NearLabel doXC, done;
assert_different_registers(Z_R0, Z_R1, addr, count);
@@ -105,7 +104,7 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
__ add2reg_with_index(count, -BytesPerHeapOop, count, addr);
// Get base address of card table.
- __ load_const_optimized(Z_R1, (address)ct->byte_map_base());
+ __ load_const_optimized(Z_R1, (address)ctbs->card_table_base_const());
// count = (count>>shift) - (addr>>shift)
__ z_srlg(addr, addr, CardTable::card_shift());
@@ -179,13 +178,12 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register store_addr, Register tmp) {
// Does a store check for the oop in register obj. The content of
// register obj is destroyed afterwards.
- CardTableBarrierSet* ctbs = barrier_set_cast(BarrierSet::barrier_set());
- CardTable* ct = ctbs->card_table();
+ CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set();
assert_different_registers(store_addr, tmp);
__ z_srlg(store_addr, store_addr, CardTable::card_shift());
- __ load_absolute_address(tmp, (address)ct->byte_map_base());
+ __ load_absolute_address(tmp, (address)ctbs->card_table_base_const());
__ z_agr(store_addr, tmp);
__ z_mvi(0, store_addr, CardTable::dirty_card_val());
}
diff --git a/src/hotspot/cpu/s390/globals_s390.hpp b/src/hotspot/cpu/s390/globals_s390.hpp
index 07987ea3469..d110443adf8 100644
--- a/src/hotspot/cpu/s390/globals_s390.hpp
+++ b/src/hotspot/cpu/s390/globals_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2018 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -43,7 +43,7 @@ define_pd_global(size_t, CodeCacheSegmentSize, 256);
// Ideally, this is 256 (cache line size). This keeps code end data
// on separate lines. But we reduced it to 64 since 256 increased
// code size significantly by padding nops between IVC and second UEP.
-define_pd_global(intx, CodeEntryAlignment, 64);
+define_pd_global(uint, CodeEntryAlignment, 64);
define_pd_global(intx, OptoLoopAlignment, 2);
define_pd_global(intx, InlineSmallCode, 2000);
diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp
index a80ca26239b..d5239898dd7 100644
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp
+++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -1259,27 +1259,15 @@ void InterpreterMacroAssembler::profile_final_call(Register mdp) {
void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
Register mdp,
- Register reg2,
- bool receiver_can_be_null) {
+ Register reg2) {
if (ProfileInterpreter) {
NearLabel profile_continue;
// If no method data exists, go to profile_continue.
test_method_data_pointer(mdp, profile_continue);
- NearLabel skip_receiver_profile;
- if (receiver_can_be_null) {
- NearLabel not_null;
- compareU64_and_branch(receiver, (intptr_t)0L, bcondNotEqual, not_null);
- // We are making a call. Increment the count for null receiver.
- increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
- z_bru(skip_receiver_profile);
- bind(not_null);
- }
-
// Record the receiver type.
record_klass_in_profile(receiver, mdp, reg2);
- bind(skip_receiver_profile);
// The method data pointer needs to be updated to reflect the new target.
update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
diff --git a/src/hotspot/cpu/s390/interp_masm_s390.hpp b/src/hotspot/cpu/s390/interp_masm_s390.hpp
index d981f9ea01e..b816185b065 100644
--- a/src/hotspot/cpu/s390/interp_masm_s390.hpp
+++ b/src/hotspot/cpu/s390/interp_masm_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -296,8 +296,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
void profile_call(Register mdp);
void profile_final_call(Register mdp);
void profile_virtual_call(Register receiver, Register mdp,
- Register scratch2,
- bool receiver_can_be_null = false);
+ Register scratch2);
void profile_ret(Register return_bci, Register mdp);
void profile_null_seen(Register mdp);
void profile_typecheck(Register mdp, Register klass, Register scratch);
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
index f35e18c7398..de3608e74ba 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
- * Copyright 2024 IBM Corporation. All rights reserved.
+ * Copyright 2024, 2026 IBM Corporation. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -44,6 +44,7 @@
#include "runtime/icache.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/objectMonitor.hpp"
+#include "runtime/objectMonitorTable.hpp"
#include "runtime/os.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/safepointMechanism.hpp"
@@ -1236,7 +1237,6 @@ void MacroAssembler::load_narrow_oop(Register t, narrowOop a) {
// Load narrow klass constant, compression required.
void MacroAssembler::load_narrow_klass(Register t, Klass* k) {
- assert(UseCompressedClassPointers, "must be on to call this method");
narrowKlass encoded_k = CompressedKlassPointers::encode(k);
load_const_32to64(t, encoded_k, false /*sign_extend*/);
}
@@ -1254,7 +1254,6 @@ void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2)
// Compare narrow oop in reg with narrow oop constant, no decompression.
void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) {
- assert(UseCompressedClassPointers, "must be on to call this method");
narrowKlass encoded_k = CompressedKlassPointers::encode(klass2);
Assembler::z_clfi(klass1, encoded_k);
@@ -1347,8 +1346,6 @@ int MacroAssembler::patch_load_narrow_oop(address pos, oop o) {
// Patching the immediate value of CPU version dependent load_narrow_klass sequence.
// The passed ptr must NOT be in compressed format!
int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) {
- assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
-
narrowKlass nk = CompressedKlassPointers::encode(k);
return patch_load_const_32to64(pos, nk);
}
@@ -1363,8 +1360,6 @@ int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) {
// Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence.
// The passed ptr must NOT be in compressed format!
int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) {
- assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
-
narrowKlass nk = CompressedKlassPointers::encode(k);
return patch_compare_immediate_32(pos, nk);
}
@@ -2234,10 +2229,8 @@ int MacroAssembler::ic_check(int end_alignment) {
if (UseCompactObjectHeaders) {
load_narrow_klass_compact(R1_scratch, R2_receiver);
- } else if (UseCompressedClassPointers) {
- z_llgf(R1_scratch, Address(R2_receiver, oopDesc::klass_offset_in_bytes()));
} else {
- z_lg(R1_scratch, Address(R2_receiver, oopDesc::klass_offset_in_bytes()));
+ z_llgf(R1_scratch, Address(R2_receiver, oopDesc::klass_offset_in_bytes()));
}
z_cg(R1_scratch, Address(R9_data, in_bytes(CompiledICData::speculated_klass_offset())));
z_bre(success);
@@ -3915,7 +3908,6 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
address base = CompressedKlassPointers::base();
int shift = CompressedKlassPointers::shift();
bool need_zero_extend = base != nullptr;
- assert(UseCompressedClassPointers, "only for compressed klass ptrs");
BLOCK_COMMENT("cKlass encoder {");
@@ -4012,7 +4004,6 @@ int MacroAssembler::instr_size_for_decode_klass_not_null() {
address base = CompressedKlassPointers::base();
int shift_size = CompressedKlassPointers::shift() == 0 ? 0 : 6; /* sllg */
int addbase_size = 0;
- assert(UseCompressedClassPointers, "only for compressed klass ptrs");
if (base != nullptr) {
unsigned int base_h = ((unsigned long)base)>>32;
@@ -4042,7 +4033,6 @@ void MacroAssembler::decode_klass_not_null(Register dst) {
address base = CompressedKlassPointers::base();
int shift = CompressedKlassPointers::shift();
int beg_off = offset();
- assert(UseCompressedClassPointers, "only for compressed klass ptrs");
BLOCK_COMMENT("cKlass decoder (const size) {");
@@ -4084,7 +4074,6 @@ void MacroAssembler::decode_klass_not_null(Register dst) {
void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
address base = CompressedKlassPointers::base();
int shift = CompressedKlassPointers::shift();
- assert(UseCompressedClassPointers, "only for compressed klass ptrs");
BLOCK_COMMENT("cKlass decoder {");
@@ -4124,13 +4113,9 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
}
void MacroAssembler::load_klass(Register klass, Address mem) {
- if (UseCompressedClassPointers) {
- z_llgf(klass, mem);
- // Attention: no null check here!
- decode_klass_not_null(klass);
- } else {
- z_lg(klass, mem);
- }
+ z_llgf(klass, mem);
+ // Attention: no null check here!
+ decode_klass_not_null(klass);
}
// Loads the obj's Klass* into dst.
@@ -4153,10 +4138,8 @@ void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
assert_different_registers(klass, obj, tmp);
load_narrow_klass_compact(tmp, obj);
z_cr(klass, tmp);
- } else if (UseCompressedClassPointers) {
- z_c(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
} else {
- z_cg(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+ z_c(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
}
BLOCK_COMMENT("} cmp_klass");
}
@@ -4169,12 +4152,9 @@ void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Regi
load_narrow_klass_compact(tmp1, obj1);
load_narrow_klass_compact(tmp2, obj2);
z_cr(tmp1, tmp2);
- } else if (UseCompressedClassPointers) {
+ } else {
z_l(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes()));
z_c(tmp1, Address(obj2, oopDesc::klass_offset_in_bytes()));
- } else {
- z_lg(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes()));
- z_cg(tmp1, Address(obj2, oopDesc::klass_offset_in_bytes()));
}
BLOCK_COMMENT("} cmp_klasses_from_objects");
}
@@ -4183,36 +4163,28 @@ void MacroAssembler::load_klass(Register klass, Register src_oop) {
if (UseCompactObjectHeaders) {
load_narrow_klass_compact(klass, src_oop);
decode_klass_not_null(klass);
- } else if (UseCompressedClassPointers) {
+ } else {
z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop);
decode_klass_not_null(klass);
- } else {
- z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop);
}
}
void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) {
assert(!UseCompactObjectHeaders, "Don't use with compact headers");
- if (UseCompressedClassPointers) {
- assert_different_registers(dst_oop, klass, Z_R0);
- if (ck == noreg) ck = klass;
- encode_klass_not_null(ck, klass);
- z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
- } else {
- z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
- }
+ assert_different_registers(dst_oop, klass, Z_R0);
+ if (ck == noreg) ck = klass;
+ encode_klass_not_null(ck, klass);
+ z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
}
void MacroAssembler::store_klass_gap(Register s, Register d) {
assert(!UseCompactObjectHeaders, "Don't use with compact headers");
- if (UseCompressedClassPointers) {
- assert(s != d, "not enough registers");
- // Support s = noreg.
- if (s != noreg) {
- z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes()));
- } else {
- z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0);
- }
+ assert(s != d, "not enough registers");
+ // Support s = noreg.
+ if (s != noreg) {
+ z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes()));
+ } else {
+ z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0);
}
}
@@ -4226,67 +4198,64 @@ void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rba
BLOCK_COMMENT("compare klass ptr {");
- if (UseCompressedClassPointers) {
- const int shift = CompressedKlassPointers::shift();
- address base = CompressedKlassPointers::base();
+ const int shift = CompressedKlassPointers::shift();
+ address base = CompressedKlassPointers::base();
- if (UseCompactObjectHeaders) {
- assert(shift >= 3, "cKlass encoder detected bad shift");
- } else {
- assert((shift == 0) || (shift == 3), "cKlass encoder detected bad shift");
- }
- assert_different_registers(Rop1, Z_R0);
- assert_different_registers(Rop1, Rbase, Z_R1);
-
- // First encode register oop and then compare with cOop in memory.
- // This sequence saves an unnecessary cOop load and decode.
- if (base == nullptr) {
- if (shift == 0) {
- z_cl(Rop1, disp, Rbase); // Unscaled
- } else {
- z_srlg(Z_R0, Rop1, shift); // ZeroBased
- z_cl(Z_R0, disp, Rbase);
- }
- } else { // HeapBased
-#ifdef ASSERT
- bool used_R0 = true;
- bool used_R1 = true;
-#endif
- Register current = Rop1;
- Label done;
-
- if (maybenull) { // null pointer must be preserved!
- z_ltgr(Z_R0, current);
- z_bre(done);
- current = Z_R0;
- }
-
- unsigned int base_h = ((unsigned long)base)>>32;
- unsigned int base_l = (unsigned int)((unsigned long)base);
- if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
- lgr_if_needed(Z_R0, current);
- z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half.
- } else if ((base_h == 0) && (base_l != 0)) {
- lgr_if_needed(Z_R0, current);
- z_agfi(Z_R0, -(int)base_l);
- } else {
- int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
- add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement.
- }
-
- if (shift != 0) {
- z_srlg(Z_R0, Z_R0, shift);
- }
- bind(done);
- z_cl(Z_R0, disp, Rbase);
-#ifdef ASSERT
- if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
- if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
-#endif
- }
+ if (UseCompactObjectHeaders) {
+ assert(shift >= 3, "cKlass encoder detected bad shift");
} else {
- z_clg(Rop1, disp, Z_R0, Rbase);
+ assert((shift == 0) || (shift == 3), "cKlass encoder detected bad shift");
}
+ assert_different_registers(Rop1, Z_R0);
+ assert_different_registers(Rop1, Rbase, Z_R1);
+
+ // First encode register oop and then compare with cOop in memory.
+ // This sequence saves an unnecessary cOop load and decode.
+ if (base == nullptr) {
+ if (shift == 0) {
+ z_cl(Rop1, disp, Rbase); // Unscaled
+ } else {
+ z_srlg(Z_R0, Rop1, shift); // ZeroBased
+ z_cl(Z_R0, disp, Rbase);
+ }
+ } else { // HeapBased
+#ifdef ASSERT
+ bool used_R0 = true;
+ bool used_R1 = true;
+#endif
+ Register current = Rop1;
+ Label done;
+
+ if (maybenull) { // null pointer must be preserved!
+ z_ltgr(Z_R0, current);
+ z_bre(done);
+ current = Z_R0;
+ }
+
+ unsigned int base_h = ((unsigned long)base)>>32;
+ unsigned int base_l = (unsigned int)((unsigned long)base);
+ if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
+ lgr_if_needed(Z_R0, current);
+ z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half.
+ } else if ((base_h == 0) && (base_l != 0)) {
+ lgr_if_needed(Z_R0, current);
+ z_agfi(Z_R0, -(int)base_l);
+ } else {
+ int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
+ add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement.
+ }
+
+ if (shift != 0) {
+ z_srlg(Z_R0, Z_R0, shift);
+ }
+ bind(done);
+ z_cl(Z_R0, disp, Rbase);
+#ifdef ASSERT
+ if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
+ if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
+#endif
+ }
+
BLOCK_COMMENT("} compare klass ptr");
}
@@ -6372,45 +6341,55 @@ void MacroAssembler::compiler_fast_lock_object(Register obj, Register box, Regis
if (!UseObjectMonitorTable) {
assert(tmp1_monitor == mark, "should be the same here");
} else {
+ const Register tmp1_bucket = tmp1;
+ const Register hash = Z_R0_scratch;
NearLabel monitor_found;
- // load cache address
- z_la(tmp1, Address(Z_thread, JavaThread::om_cache_oops_offset()));
+ // Save the mark, we might need it to extract the hash.
+ z_lgr(hash, mark);
- const int num_unrolled = 2;
+ // Look for the monitor in the om_cache.
+
+ ByteSize cache_offset = JavaThread::om_cache_oops_offset();
+ ByteSize monitor_offset = OMCache::oop_to_monitor_difference();
+ const int num_unrolled = OMCache::CAPACITY;
for (int i = 0; i < num_unrolled; i++) {
- z_cg(obj, Address(tmp1));
+ z_lg(tmp1_monitor, Address(Z_thread, cache_offset + monitor_offset));
+ z_cg(obj, Address(Z_thread, cache_offset));
z_bre(monitor_found);
- add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference()));
+ cache_offset = cache_offset + OMCache::oop_to_oop_difference();
}
- NearLabel loop;
- // Search for obj in cache
+ // Get the hash code.
+ z_srlg(hash, hash, markWord::hash_shift);
- bind(loop);
+ // Get the table and calculate the bucket's address.
+ load_const_optimized(tmp2, ObjectMonitorTable::current_table_address());
+ z_lg(tmp2, Address(tmp2));
+ z_ng(hash, Address(tmp2, ObjectMonitorTable::table_capacity_mask_offset()));
+ z_lg(tmp1_bucket, Address(tmp2, ObjectMonitorTable::table_buckets_offset()));
+ z_sllg(hash, hash, LogBytesPerWord);
+ z_agr(tmp1_bucket, hash);
- // check for match.
- z_cg(obj, Address(tmp1));
- z_bre(monitor_found);
+ // Read the monitor from the bucket.
+ z_lg(tmp1_monitor, Address(tmp1_bucket));
- // search until null encountered, guaranteed _null_sentinel at end.
- add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference()));
- z_cghsi(0, tmp1, 0);
- z_brne(loop); // if not EQ to 0, go for another loop
+ // Check if the monitor in the bucket is special (empty, tombstone or removed).
+ z_clgfi(tmp1_monitor, ObjectMonitorTable::SpecialPointerValues::below_is_special);
+ z_brl(slow_path);
- // we reached to the end, cache miss
- z_ltgr(obj, obj); // set CC to NE
- z_bru(slow_path);
+ // Check if object matches.
+ z_lg(tmp2, Address(tmp1_monitor, ObjectMonitor::object_offset()));
+ BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs_asm->try_resolve_weak_handle(this, tmp2, Z_R0_scratch, slow_path);
+ z_cgr(obj, tmp2);
+ z_brne(slow_path);
- // cache hit
bind(monitor_found);
- z_lg(tmp1_monitor, Address(tmp1, OMCache::oop_to_monitor_difference()));
}
NearLabel monitor_locked;
// lock the monitor
- // mark contains the tagged ObjectMonitor*.
- const Register tagged_monitor = mark;
const Register zero = tmp2;
const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value));
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
index da24ae80d45..32e484d4790 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* Copyright (c) 2024 IBM Corporation. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -842,8 +842,7 @@ class MacroAssembler: public Assembler {
void store_klass(Register klass, Register dst_oop, Register ck = noreg); // Klass will get compressed if ck not provided.
void store_klass_gap(Register s, Register dst_oop);
void load_narrow_klass_compact(Register dst, Register src);
- // Compares the Klass pointer of an object to a given Klass (which might be narrow,
- // depending on UseCompressedClassPointers).
+ // Compares the narrow Klass pointer of an object to a given narrow Klass
void cmp_klass(Register klass, Register obj, Register tmp);
// Compares the Klass pointer of two objects obj1 and obj2. Result is in the condition flags.
// Uses tmp1 and tmp2 as temporary registers.
diff --git a/src/hotspot/cpu/s390/matcher_s390.hpp b/src/hotspot/cpu/s390/matcher_s390.hpp
index 99461e33e3c..b04a6566d41 100644
--- a/src/hotspot/cpu/s390/matcher_s390.hpp
+++ b/src/hotspot/cpu/s390/matcher_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2017, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -82,7 +82,6 @@
static bool narrow_klass_use_complex_address() {
NOT_LP64(ShouldNotCallThis());
- assert(UseCompressedClassPointers, "only for compressed klass code");
// TODO HS25: z port if (MatchDecodeNodes) return true;
return false;
}
diff --git a/src/hotspot/cpu/s390/methodHandles_s390.cpp b/src/hotspot/cpu/s390/methodHandles_s390.cpp
index e3de6d911be..dfb8ce09b27 100644
--- a/src/hotspot/cpu/s390/methodHandles_s390.cpp
+++ b/src/hotspot/cpu/s390/methodHandles_s390.cpp
@@ -120,16 +120,12 @@ void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind,
__ z_nilf(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
__ compare32_and_branch(temp, constant(ref_kind), Assembler::bcondEqual, L);
- {
- char *buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
-
- jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
- if (ref_kind == JVM_REF_invokeVirtual || ref_kind == JVM_REF_invokeSpecial) {
- // Could do this for all ref_kinds, but would explode assembly code size.
- trace_method_handle(_masm, buf);
- }
- __ stop(buf);
+ const char* msg = ref_kind_to_verify_msg(ref_kind);
+ if (ref_kind == JVM_REF_invokeVirtual || ref_kind == JVM_REF_invokeSpecial) {
+ // Could do this for all ref_kinds, but would explode assembly code size.
+ trace_method_handle(_masm, msg);
}
+ __ stop(msg);
BLOCK_COMMENT("} verify_ref_kind");
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index 19bd3620228..2208a197ac9 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2017, 2024 SAP SE. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
@@ -1890,10 +1890,8 @@ const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
-// Return whether or not this register is ever used as an argument. This
-// function is used on startup to build the trampoline stubs in generateOptoStub.
-// Registers not mentioned will be killed by the VM call in the trampoline, and
-// arguments in those registers not be available to the callee.
+#ifdef ASSERT
+// Return whether or not this register is ever used as an argument.
bool Matcher::can_be_java_arg(int reg) {
// We return true for all registers contained in z_iarg_reg[] and
// z_farg_reg[] and their virtual halves.
@@ -1917,10 +1915,7 @@ bool Matcher::can_be_java_arg(int reg) {
return false;
}
-
-bool Matcher::is_spillable_arg(int reg) {
- return can_be_java_arg(reg);
-}
+#endif
uint Matcher::int_pressure_limit()
{
@@ -1934,10 +1929,6 @@ uint Matcher::float_pressure_limit()
return (FLOATPRESSURE == -1) ? 15 : FLOATPRESSURE;
}
-bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
- return false;
-}
-
// Register for DIVI projection of divmodI
const RegMask& Matcher::divI_proj_mask() {
return _Z_RARG4_INT_REG_mask;
@@ -2606,13 +2597,6 @@ frame %{
// z/Architecture stack pointer
frame_pointer(Z_R15); // Z_SP
- // Interpreter stores its frame pointer in a register which is
- // stored to the stack by I2CAdaptors. I2CAdaptors convert from
- // interpreted java to compiled java.
- //
- // Z_state holds pointer to caller's cInterpreter.
- interpreter_frame_pointer(Z_R7); // Z_state
-
// Use alignment_in_bytes instead of log_2_of_alignment_in_bits.
stack_alignment(frame::alignment_in_bytes);
@@ -5251,6 +5235,15 @@ instruct membar_release_lock() %{
ins_pipe(pipe_class_dummy);
%}
+instruct membar_storeload() %{
+ match(MemBarStoreLoad);
+ ins_cost(4 * MEMORY_REF_COST);
+ size(2);
+ format %{ "MEMBAR-storeload" %}
+ ins_encode %{ __ z_fence(); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
instruct membar_volatile() %{
match(MemBarVolatile);
ins_cost(4 * MEMORY_REF_COST);
@@ -5270,6 +5263,15 @@ instruct unnecessary_membar_volatile() %{
ins_pipe(pipe_class_dummy);
%}
+instruct membar_full() %{
+ match(MemBarFull);
+ ins_cost(4 * MEMORY_REF_COST);
+ size(2);
+ format %{ "MEMBAR-full" %}
+ ins_encode %{ __ z_fence(); %}
+ ins_pipe(pipe_class_dummy);
+%}
+
instruct membar_CPUOrder() %{
match(MemBarCPUOrder);
ins_cost(0);
diff --git a/src/hotspot/cpu/s390/stubDeclarations_s390.hpp b/src/hotspot/cpu/s390/stubDeclarations_s390.hpp
index c3ad3cefeb9..d0e26beedab 100644
--- a/src/hotspot/cpu/s390/stubDeclarations_s390.hpp
+++ b/src/hotspot/cpu/s390/stubDeclarations_s390.hpp
@@ -29,28 +29,32 @@
#define STUBGEN_PREUNIVERSE_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(preuniverse, 0) \
#define STUBGEN_INITIAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(initial, 20000) \
#define STUBGEN_CONTINUATION_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(continuation, 2000) \
#define STUBGEN_COMPILER_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(compiler, 20000 ) \
do_stub(compiler, partial_subtype_check) \
do_arch_entry(zarch, compiler, partial_subtype_check, \
@@ -60,7 +64,8 @@
#define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(final, 20000) \
diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
index 2aa365be999..3f16312eb48 100644
--- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
@@ -3422,7 +3422,7 @@ class StubGenerator: public StubCodeGenerator {
}
public:
- StubGenerator(CodeBuffer* code, BlobId blob_id) : StubCodeGenerator(code, blob_id) {
+ StubGenerator(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) : StubCodeGenerator(code, blob_id, stub_data) {
switch(blob_id) {
case BlobId::stubgen_preuniverse_id:
generate_preuniverse_stubs();
@@ -3479,6 +3479,6 @@ class StubGenerator: public StubCodeGenerator {
};
-void StubGenerator_generate(CodeBuffer* code, BlobId blob_id) {
- StubGenerator g(code, blob_id);
+void StubGenerator_generate(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) {
+ StubGenerator g(code, blob_id, stub_data);
}
diff --git a/src/hotspot/cpu/s390/stubRoutines_s390.cpp b/src/hotspot/cpu/s390/stubRoutines_s390.cpp
index 6feb20f9604..eda0ebfdecc 100644
--- a/src/hotspot/cpu/s390/stubRoutines_s390.cpp
+++ b/src/hotspot/cpu/s390/stubRoutines_s390.cpp
@@ -40,8 +40,12 @@
#define DEFINE_ARCH_ENTRY_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
address StubRoutines:: arch :: STUB_FIELD_NAME(field_name) = CAST_FROM_FN_PTR(address, init_function);
-STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY, DEFINE_ARCH_ENTRY_INIT)
+#define DEFINE_ARCH_ENTRY_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ address StubRoutines:: arch :: STUB_FIELD_NAME(field_name) [idx] ;
+STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY, DEFINE_ARCH_ENTRY_INIT, DEFINE_ARCH_ENTRY_ARRAY)
+
+#undef DEFINE_ARCH_ENTRY_ARRAY
#undef DEFINE_ARCH_ENTRY_INIT
#undef DEFINE_ARCH_ENTRY
@@ -736,3 +740,9 @@ juint StubRoutines::zarch::_crc32c_table[CRC32_TABLES][CRC32_COLUMN_SIZE] = {
}
#endif
};
+
+#if INCLUDE_CDS
+// nothing to do for s390
+void StubRoutines::init_AOTAddressTable() {
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/s390/stubRoutines_s390.hpp b/src/hotspot/cpu/s390/stubRoutines_s390.hpp
index 0a07efae46c..e575115b731 100644
--- a/src/hotspot/cpu/s390/stubRoutines_s390.hpp
+++ b/src/hotspot/cpu/s390/stubRoutines_s390.hpp
@@ -81,9 +81,13 @@ class zarch {
#define DECLARE_ARCH_ENTRY_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
DECLARE_ARCH_ENTRY(arch, blob_name, stub_name, field_name, getter_name)
-private:
- STUBGEN_ARCH_ENTRIES_DO(DECLARE_ARCH_ENTRY, DECLARE_ARCH_ENTRY_INIT)
+#define DECLARE_ARCH_ENTRY_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ static address STUB_FIELD_NAME(field_name) [count] ;
+private:
+ STUBGEN_ARCH_ENTRIES_DO(DECLARE_ARCH_ENTRY, DECLARE_ARCH_ENTRY_INIT, DECLARE_ARCH_ENTRY_ARRAY)
+
+#undef DECLARE_ARCH_ENTRY_ARRAY
#undef DECLARE_ARCH_ENTRY_INIT
#undef DECLARE_ARCH_ENTRY
@@ -108,8 +112,12 @@ private:
#define DEFINE_ARCH_ENTRY_GETTER_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
DEFINE_ARCH_ENTRY_GETTER(arch, blob_name, stub_name, field_name, getter_name)
- STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY_GETTER, DEFINE_ARCH_ENTRY_GETTER_INIT)
+#define DEFINE_ARCH_ENTRY_GETTER_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ static address getter_name(int idx) { return STUB_FIELD_NAME(field_name) [idx] ; }
+ STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY_GETTER, DEFINE_ARCH_ENTRY_GETTER_INIT, DEFINE_ARCH_ENTRY_GETTER_ARRAY)
+
+#undef DEFINE_ARCH_ENTRY_GETTER_ARRAY
#undef DEFINE_ARCH_ENTRY_GETTER_INIT
#undef DEFINE_ARCH_ENTRY_GETTER
diff --git a/src/hotspot/cpu/s390/vm_version_s390.cpp b/src/hotspot/cpu/s390/vm_version_s390.cpp
index 7f5b4870aab..7e9000991ca 100644
--- a/src/hotspot/cpu/s390/vm_version_s390.cpp
+++ b/src/hotspot/cpu/s390/vm_version_s390.cpp
@@ -289,10 +289,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
}
- if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
- FLAG_SET_DEFAULT(UseSHA, false);
- }
-
if (UseSecondarySupersTable && VM_Version::get_model_index() < 5 /* z196/z11 */) {
if (!FLAG_IS_DEFAULT(UseSecondarySupersTable)) {
warning("UseSecondarySupersTable requires z196 or later.");
diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index cbc5c6988d4..a4f2968f0d1 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -3472,7 +3472,7 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
emit_int16(0x6F, (0xC0 | encode));
}
-void Assembler::vmovw(XMMRegister dst, Register src) {
+void Assembler::evmovw(XMMRegister dst, Register src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
@@ -3480,7 +3480,7 @@ void Assembler::vmovw(XMMRegister dst, Register src) {
emit_int16(0x6E, (0xC0 | encode));
}
-void Assembler::vmovw(Register dst, XMMRegister src) {
+void Assembler::evmovw(Register dst, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
@@ -3488,6 +3488,36 @@ void Assembler::vmovw(Register dst, XMMRegister src) {
emit_int16(0x7E, (0xC0 | encode));
}
+void Assembler::evmovw(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_avx10_2(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
+ attributes.set_is_evex_instruction();
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
+ emit_int8(0x6E);
+ emit_operand(dst, src, 0);
+}
+
+void Assembler::evmovw(Address dst, XMMRegister src) {
+ assert(VM_Version::supports_avx10_2(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
+ emit_int8(0x7E);
+ emit_operand(src, dst, 0);
+}
+
+void Assembler::evmovw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_avx10_2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
+ emit_int16(0x6E, (0xC0 | encode));
+}
+
void Assembler::vmovdqu(XMMRegister dst, Address src) {
assert(UseAVX > 0, "");
InstructionMark im(this);
@@ -5442,6 +5472,13 @@ void Assembler::pmovsxwd(XMMRegister dst, XMMRegister src) {
emit_int16(0x23, (0xC0 | encode));
}
+void Assembler::pmovzxwd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x33, (0xC0 | encode));
+}
+
void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
@@ -7303,6 +7340,42 @@ void Assembler::etzcntq(Register dst, Address src, bool no_flags) {
emit_operand(dst, src, 0);
}
+void Assembler::evucomish(XMMRegister dst, Address src) {
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
+ attributes.set_is_evex_instruction();
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
+ emit_int8(0x2E);
+ emit_operand(dst, src, 0);
+}
+
+void Assembler::evucomish(XMMRegister dst, XMMRegister src) {
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
+ emit_int16(0x2E, (0xC0 | encode));
+}
+
+void Assembler::evucomxsh(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_avx10_2(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
+ attributes.set_is_evex_instruction();
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
+ emit_int8(0x2E);
+ emit_operand(dst, src, 0);
+}
+
+void Assembler::evucomxsh(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_avx10_2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
+ emit_int16(0x2E, (0xC0 | encode));
+}
+
void Assembler::ucomisd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -7320,6 +7393,25 @@ void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
emit_int16(0x2E, (0xC0 | encode));
}
+void Assembler::evucomxsd(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_avx10_2(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+ attributes.set_is_evex_instruction();
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x2E);
+ emit_operand(dst, src, 0);
+}
+
+void Assembler::evucomxsd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_avx10_2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x2E, (0xC0 | encode));
+}
+
void Assembler::ucomiss(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -7335,6 +7427,25 @@ void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
emit_int16(0x2E, (0xC0 | encode));
}
+void Assembler::evucomxss(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_avx10_2(), "");
+ InstructionMark im(this);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.set_is_evex_instruction();
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x2E);
+ emit_operand(dst, src, 0);
+}
+
+void Assembler::evucomxss(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_avx10_2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x2E, (0xC0 | encode));
+}
+
void Assembler::xabort(int8_t imm8) {
emit_int24((unsigned char)0xC6, (unsigned char)0xF8, (imm8 & 0xFF));
}
@@ -8366,30 +8477,6 @@ void Assembler::vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
emit_int16(0x59, (0xC0 | encode));
}
-void Assembler::vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
- assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_is_evex_instruction();
- int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
- emit_int16(0x5F, (0xC0 | encode));
-}
-
-void Assembler::eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
- assert(VM_Version::supports_avx10_2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_is_evex_instruction();
- int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
- emit_int24(0x53, (0xC0 | encode), imm8);
-}
-
-void Assembler::vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
- assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_is_evex_instruction();
- int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
- emit_int16(0x5D, (0xC0 | encode));
-}
-
void Assembler::vsqrtsh(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -13324,48 +13411,38 @@ bool Assembler::is_demotable(bool no_flags, int dst_enc, int nds_enc) {
return (!no_flags && dst_enc == nds_enc);
}
-void Assembler::vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
- assert(VM_Version::supports_avx(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
- emit_int16(0x5F, (0xC0 | encode));
-}
-
-void Assembler::vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
- assert(VM_Version::supports_avx(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_rex_vex_w_reverted();
- int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
- emit_int16(0x5F, (0xC0 | encode));
-}
-
-void Assembler::vminss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
- assert(VM_Version::supports_avx(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
- emit_int16(0x5D, (0xC0 | encode));
-}
-
-void Assembler::eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
+void Assembler::evminmaxsh(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8) {
assert(VM_Version::supports_avx10_2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x53, (0xC0 | encode), imm8);
+}
+
+void Assembler::evminmaxss(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8) {
+ assert(VM_Version::supports_avx10_2(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x53, (0xC0 | encode), imm8);
}
-void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
- assert(VM_Version::supports_avx(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
- attributes.set_rex_vex_w_reverted();
- int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
- emit_int16(0x5D, (0xC0 | encode));
-}
-
-void Assembler::eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
+void Assembler::evminmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8) {
assert(VM_Version::supports_avx10_2(), "");
- InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x53, (0xC0 | encode), imm8);
}
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 26c57fc2d80..98684752b0c 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1694,8 +1694,11 @@ private:
void movsbl(Register dst, Address src);
void movsbl(Register dst, Register src);
- void vmovw(XMMRegister dst, Register src);
- void vmovw(Register dst, XMMRegister src);
+ void evmovw(XMMRegister dst, Register src);
+ void evmovw(Register dst, XMMRegister src);
+ void evmovw(XMMRegister dst, Address src);
+ void evmovw(Address dst, XMMRegister src);
+ void evmovw(XMMRegister dst, XMMRegister src);
void movsbq(Register dst, Address src);
void movsbq(Register dst, Register src);
@@ -1965,6 +1968,7 @@ private:
void pmovsxbq(XMMRegister dst, XMMRegister src);
void pmovsxbw(XMMRegister dst, XMMRegister src);
void pmovsxwd(XMMRegister dst, XMMRegister src);
+ void pmovzxwd(XMMRegister dst, XMMRegister src);
void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
@@ -2328,13 +2332,23 @@ private:
void tzcntq(Register dst, Address src);
void etzcntq(Register dst, Address src, bool no_flags);
+ // Unordered Compare Scalar Half-Precision Floating-Point Values and set EFLAGS
+ void evucomish(XMMRegister dst, Address src);
+ void evucomish(XMMRegister dst, XMMRegister src);
+ void evucomxsh(XMMRegister dst, Address src);
+ void evucomxsh(XMMRegister dst, XMMRegister src);
+
// Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
void ucomisd(XMMRegister dst, Address src);
void ucomisd(XMMRegister dst, XMMRegister src);
+ void evucomxsd(XMMRegister dst, Address src);
+ void evucomxsd(XMMRegister dst, XMMRegister src);
// Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
void ucomiss(XMMRegister dst, Address src);
void ucomiss(XMMRegister dst, XMMRegister src);
+ void evucomxss(XMMRegister dst, Address src);
+ void evucomxss(XMMRegister dst, XMMRegister src);
void xabort(int8_t imm8);
@@ -2412,11 +2426,6 @@ private:
void vsubss(XMMRegister dst, XMMRegister nds, Address src);
void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
- void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
- void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
- void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
- void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
-
void sarxl(Register dst, Register src1, Register src2);
void sarxl(Register dst, Address src1, Register src2);
void sarxq(Register dst, Register src1, Register src2);
@@ -2547,8 +2556,6 @@ private:
void vsubsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vdivsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
- void vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
- void vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vsqrtsh(XMMRegister dst, XMMRegister src);
void vfmadd132sh(XMMRegister dst, XMMRegister src1, XMMRegister src2);
@@ -2785,9 +2792,9 @@ private:
void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
// AVX10.2 floating point minmax instructions
- void eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
- void eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
- void eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
+ void evminmaxsh(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8);
+ void evminmaxss(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8);
+ void evminmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8);
void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len);
void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len);
void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len);
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index 37ee9451405..5c05b3702bb 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,6 +32,7 @@
#include "c1/c1_ValueStack.hpp"
#include "ci/ciArrayKlass.hpp"
#include "ci/ciInstance.hpp"
+#include "code/aotCodeCache.hpp"
#include "compiler/oopMap.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/gc_globals.hpp"
@@ -41,6 +42,7 @@
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/threadIdentifier.hpp"
#include "utilities/powerOfTwo.hpp"
#include "vmreg_x86.inline.hpp"
@@ -69,6 +71,17 @@ static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2],
static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], (jlong)UCONST64(0x8000000080000000), (jlong)UCONST64(0x8000000080000000));
static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], (jlong)UCONST64(0x8000000000000000), (jlong)UCONST64(0x8000000000000000));
+#if INCLUDE_CDS
+// publish external addresses defined in this file
+void LIR_Assembler::init_AOTAddressTable(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ ADD(float_signmask_pool);
+ ADD(double_signmask_pool);
+ ADD(float_signflip_pool);
+ ADD(double_signflip_pool);
+#undef ADD
+}
+#endif // INCLUDE_CDS
NEEDS_CLEANUP // remove this definitions ?
const Register SYNC_header = rax; // synchronization header
@@ -76,23 +89,6 @@ const Register SHIFT_count = rcx; // where count for shift operations must be
#define __ _masm->
-
-static void select_different_registers(Register preserve,
- Register extra,
- Register &tmp1,
- Register &tmp2) {
- if (tmp1 == preserve) {
- assert_different_registers(tmp1, tmp2, extra);
- tmp1 = extra;
- } else if (tmp2 == preserve) {
- assert_different_registers(tmp1, tmp2, extra);
- tmp2 = extra;
- }
- assert_different_registers(preserve, tmp1, tmp2);
-}
-
-
-
static void select_different_registers(Register preserve,
Register extra,
Register &tmp1,
@@ -534,7 +530,29 @@ void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_cod
}
case T_LONG: {
+#if INCLUDE_CDS
+ if (AOTCodeCache::is_on_for_dump()) {
+ address b = c->as_pointer();
+ if (b == (address)ThreadIdentifier::unsafe_offset()) {
+ __ lea(dest->as_register_lo(), ExternalAddress(b));
+ break;
+ }
+ }
+#endif
assert(patch_code == lir_patch_none, "no patching handled here");
+#if INCLUDE_CDS
+ if (AOTCodeCache::is_on_for_dump()) {
+ address b = c->as_pointer();
+ if (b == (address)ThreadIdentifier::unsafe_offset()) {
+ __ lea(dest->as_register_lo(), ExternalAddress(b));
+ break;
+ }
+ if (AOTRuntimeConstants::contains(b)) {
+ __ load_aotrc_address(dest->as_register_lo(), b);
+ break;
+ }
+ }
+#endif
__ movptr(dest->as_register_lo(), (intptr_t)c->as_jlong());
break;
}
@@ -1299,12 +1317,8 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
} else if (obj == klass_RInfo) {
klass_RInfo = dst;
}
- if (k->is_loaded() && !UseCompressedClassPointers) {
- select_different_registers(obj, dst, k_RInfo, klass_RInfo);
- } else {
- Rtmp1 = op->tmp3()->as_register();
- select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
- }
+ Rtmp1 = op->tmp3()->as_register();
+ select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
assert_different_registers(obj, k_RInfo, klass_RInfo);
@@ -1338,12 +1352,8 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
if (op->fast_check()) {
// get object class
// not a safepoint as obj null check happens earlier
- if (UseCompressedClassPointers) {
- __ load_klass(Rtmp1, obj, tmp_load_klass);
- __ cmpptr(k_RInfo, Rtmp1);
- } else {
- __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
- }
+ __ load_klass(Rtmp1, obj, tmp_load_klass);
+ __ cmpptr(k_RInfo, Rtmp1);
__ jcc(Assembler::notEqual, *failure_target);
// successful cast, fall through to profile or jump
} else {
@@ -2641,9 +2651,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
// but not necessarily exactly of type default_type.
Label known_ok, halt;
__ mov_metadata(tmp, default_type->constant_encoding());
- if (UseCompressedClassPointers) {
- __ encode_klass_not_null(tmp, rscratch1);
- }
+ __ encode_klass_not_null(tmp, rscratch1);
if (basic_type != T_OBJECT) {
__ cmp_klass(tmp, dst, tmp2);
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp
index c4a368b54d8..6f179255e4a 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp
@@ -58,4 +58,7 @@ public:
void store_parameter(jobject c, int offset_from_esp_in_words);
void store_parameter(Metadata* c, int offset_from_esp_in_words);
+#if INCLUDE_CDS
+ void static init_AOTAddressTable(GrowableArray& external_addresses);
+#endif // INCLUDE_CDS
#endif // CPU_X86_C1_LIRASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
index 5459e8df229..f448e4ee17f 100644
--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1291,9 +1291,7 @@ void LIRGenerator::do_CheckCast(CheckCast* x) {
}
LIR_Opr reg = rlock_result(x);
LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
- if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
- tmp3 = new_register(objectType);
- }
+ tmp3 = new_register(objectType);
__ checkcast(reg, obj.result(), x->klass(),
new_register(objectType), new_register(objectType), tmp3,
x->direct_compare(), info_for_exception, patching_info, stub,
@@ -1313,9 +1311,7 @@ void LIRGenerator::do_InstanceOf(InstanceOf* x) {
}
obj.load_item();
LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
- if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
- tmp3 = new_register(objectType);
- }
+ tmp3 = new_register(objectType);
__ instanceof(reg, obj.result(), x->klass(),
new_register(objectType), new_register(objectType), tmp3,
x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
index 88e2e6c8ba9..7adaea48ff1 100644
--- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -85,14 +85,11 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
if (UseCompactObjectHeaders) {
movptr(t1, Address(klass, Klass::prototype_header_offset()));
movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1);
- } else if (UseCompressedClassPointers) { // Take care not to kill klass
+ } else { // Take care not to kill klass
movptr(Address(obj, oopDesc::mark_offset_in_bytes()), checked_cast(markWord::prototype().value()));
movptr(t1, klass);
encode_klass_not_null(t1, rscratch1);
movl(Address(obj, oopDesc::klass_offset_in_bytes()), t1);
- } else {
- movptr(Address(obj, oopDesc::mark_offset_in_bytes()), checked_cast(markWord::prototype().value()));
- movptr(Address(obj, oopDesc::klass_offset_in_bytes()), klass);
}
if (len->is_valid()) {
@@ -104,7 +101,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
xorl(t1, t1);
movl(Address(obj, base_offset), t1);
}
- } else if (UseCompressedClassPointers && !UseCompactObjectHeaders) {
+ } else if (!UseCompactObjectHeaders) {
xorptr(t1, t1);
store_klass_gap(obj, t1);
}
diff --git a/src/hotspot/cpu/x86/c1_globals_x86.hpp b/src/hotspot/cpu/x86/c1_globals_x86.hpp
index 978b233bb63..bb75a31a77c 100644
--- a/src/hotspot/cpu/x86/c1_globals_x86.hpp
+++ b/src/hotspot/cpu/x86/c1_globals_x86.hpp
@@ -41,7 +41,6 @@ define_pd_global(bool, TieredCompilation, false);
define_pd_global(intx, CompileThreshold, 1500 );
define_pd_global(intx, OnStackReplacePercentage, 933 );
-define_pd_global(size_t, NewSizeThreadIncrease, 4*K );
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
define_pd_global(size_t, ReservedCodeCacheSize, 32*M );
define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M );
@@ -51,7 +50,6 @@ define_pd_global(bool, ProfileInterpreter, false);
define_pd_global(size_t, CodeCacheExpansionSize, 32*K );
define_pd_global(size_t, CodeCacheMinBlockLength, 1 );
define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
-define_pd_global(bool, NeverActAsServerClassMachine, true );
define_pd_global(bool, CICompileOSR, true );
#endif // !COMPILER2
define_pd_global(bool, UseTypeProfile, false);
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index 8fc3d18abb1..b4d8aa10de2 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,9 @@
#include "opto/subnode.hpp"
#include "runtime/globals.hpp"
#include "runtime/objectMonitor.hpp"
+#include "runtime/objectMonitorTable.hpp"
#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
#include "utilities/checkedCast.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/powerOfTwo.hpp"
@@ -150,7 +152,7 @@ inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vle
// Because the transitions from emitted code to the runtime
// monitorenter/exit helper stubs are so slow it's critical that
-// we inline both the stack-locking fast path and the inflated fast path.
+// we inline both the lock-stack fast path and the inflated fast path.
//
// See also: cmpFastLock and cmpFastUnlock.
//
@@ -217,7 +219,6 @@ inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vle
// In the case of failure, the node will branch directly to the
// FailureLabel
-
// obj: object to lock
// box: on-stack box address -- KILLED
// rax: tmp -- KILLED
@@ -286,7 +287,7 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register rax_reg,
// After successful lock, push object on lock-stack.
movptr(Address(thread, top), obj);
addl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
- jmpb(locked);
+ jmp(locked);
}
{ // Handle inflated monitor.
@@ -297,38 +298,49 @@ void C2_MacroAssembler::fast_lock(Register obj, Register box, Register rax_reg,
if (!UseObjectMonitorTable) {
assert(mark == monitor, "should be the same here");
} else {
- // Uses ObjectMonitorTable. Look for the monitor in the om_cache.
- // Fetch ObjectMonitor* from the cache or take the slow-path.
+ const Register hash = t;
Label monitor_found;
- // Load cache address
- lea(t, Address(thread, JavaThread::om_cache_oops_offset()));
+ // Look for the monitor in the om_cache.
- const int num_unrolled = 2;
+ ByteSize cache_offset = JavaThread::om_cache_oops_offset();
+ ByteSize monitor_offset = OMCache::oop_to_monitor_difference();
+ const int num_unrolled = OMCache::CAPACITY;
for (int i = 0; i < num_unrolled; i++) {
- cmpptr(obj, Address(t));
+ movptr(monitor, Address(thread, cache_offset + monitor_offset));
+ cmpptr(obj, Address(thread, cache_offset));
jccb(Assembler::equal, monitor_found);
- increment(t, in_bytes(OMCache::oop_to_oop_difference()));
+ cache_offset = cache_offset + OMCache::oop_to_oop_difference();
}
- Label loop;
+ // Look for the monitor in the table.
- // Search for obj in cache.
- bind(loop);
+ // Get the hash code.
+ movptr(hash, Address(obj, oopDesc::mark_offset_in_bytes()));
+ shrq(hash, markWord::hash_shift);
+ andq(hash, markWord::hash_mask);
- // Check for match.
- cmpptr(obj, Address(t));
- jccb(Assembler::equal, monitor_found);
+ // Get the table and calculate the bucket's address.
+ lea(rax_reg, ExternalAddress(ObjectMonitorTable::current_table_address()));
+ movptr(rax_reg, Address(rax_reg));
+ andq(hash, Address(rax_reg, ObjectMonitorTable::table_capacity_mask_offset()));
+ movptr(rax_reg, Address(rax_reg, ObjectMonitorTable::table_buckets_offset()));
- // Search until null encountered, guaranteed _null_sentinel at end.
- cmpptr(Address(t), 1);
- jcc(Assembler::below, slow_path); // 0 check, but with ZF=0 when *t == 0
- increment(t, in_bytes(OMCache::oop_to_oop_difference()));
- jmpb(loop);
+ // Read the monitor from the bucket.
+ movptr(monitor, Address(rax_reg, hash, Address::times_ptr));
+
+ // Check if the monitor in the bucket is special (empty, tombstone or removed)
+ cmpptr(monitor, ObjectMonitorTable::SpecialPointerValues::below_is_special);
+ jcc(Assembler::below, slow_path);
+
+ // Check if object matches.
+ movptr(rax_reg, Address(monitor, ObjectMonitor::object_offset()));
+ BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs_asm->try_resolve_weak_handle_in_c2(this, rax_reg, slow_path);
+ cmpptr(rax_reg, obj);
+ jcc(Assembler::notEqual, slow_path);
- // Cache hit.
bind(monitor_found);
- movptr(monitor, Address(t, OMCache::oop_to_monitor_difference()));
}
const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value));
const Address recursions_address(monitor, ObjectMonitor::recursions_offset() - monitor_tag);
@@ -487,14 +499,14 @@ void C2_MacroAssembler::fast_unlock(Register obj, Register reg_rax, Register t,
cmpl(top, in_bytes(JavaThread::lock_stack_base_offset()));
jcc(Assembler::below, check_done);
cmpptr(obj, Address(thread, top));
- jccb(Assembler::notEqual, inflated_check_lock_stack);
+ jcc(Assembler::notEqual, inflated_check_lock_stack);
stop("Fast Unlock lock on stack");
bind(check_done);
if (UseObjectMonitorTable) {
movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
}
testptr(mark, markWord::monitor_value);
- jccb(Assembler::notZero, inflated);
+ jcc(Assembler::notZero, inflated);
stop("Fast Unlock not monitor");
#endif
@@ -519,7 +531,7 @@ void C2_MacroAssembler::fast_unlock(Register obj, Register reg_rax, Register t,
// Check if recursive.
cmpptr(recursions_address, 0);
- jccb(Assembler::notZero, recursive);
+ jcc(Assembler::notZero, recursive);
// Set owner to null.
// Release to satisfy the JMM
@@ -530,11 +542,11 @@ void C2_MacroAssembler::fast_unlock(Register obj, Register reg_rax, Register t,
// Check if the entry_list is empty.
cmpptr(entry_list_address, NULL_WORD);
- jccb(Assembler::zero, unlocked); // If so we are done.
+ jcc(Assembler::zero, unlocked); // If so we are done.
// Check if there is a successor.
cmpptr(succ_address, NULL_WORD);
- jccb(Assembler::notZero, unlocked); // If so we are done.
+ jcc(Assembler::notZero, unlocked); // If so we are done.
// Save the monitor pointer in the current thread, so we can try to
// reacquire the lock in SharedRuntime::monitor_exit_helper().
@@ -1025,8 +1037,8 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
}
}
-void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
- XMMRegister src1, XMMRegister src2, int vlen_enc) {
+void C2_MacroAssembler::vminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
+ XMMRegister src1, XMMRegister src2, int vlen_enc) {
assert(opc == Op_MinV || opc == Op_MinReductionV ||
opc == Op_MaxV || opc == Op_MaxReductionV, "sanity");
@@ -1040,23 +1052,49 @@ void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst,
}
}
+void C2_MacroAssembler::sminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
+ XMMRegister src1, XMMRegister src2) {
+ assert(opc == Op_MinF || opc == Op_MaxF ||
+ opc == Op_MinD || opc == Op_MaxD, "sanity");
+
+ int imm8 = (opc == Op_MinF || opc == Op_MinD) ? AVX10_2_MINMAX_MIN_COMPARE_SIGN
+ : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
+ if (elem_bt == T_FLOAT) {
+ evminmaxss(dst, mask, src1, src2, true, imm8);
+ } else {
+ assert(elem_bt == T_DOUBLE, "");
+ evminmaxsd(dst, mask, src1, src2, true, imm8);
+ }
+}
+
// Float/Double signum
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one) {
assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity");
Label DONE_LABEL;
+ // Handle special cases +0.0/-0.0 and NaN, if argument is +0.0/-0.0 or NaN, return argument
+ // If AVX10.2 (or newer) floating point comparison instructions used, SF=1 for equal and unordered cases
+ // If other floating point comparison instructions used, ZF=1 for equal and unordered cases
if (opcode == Op_SignumF) {
- ucomiss(dst, zero);
- jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
- jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
+ if (VM_Version::supports_avx10_2()) {
+ evucomxss(dst, zero);
+ jcc(Assembler::negative, DONE_LABEL);
+ } else {
+ ucomiss(dst, zero);
+ jcc(Assembler::equal, DONE_LABEL);
+ }
movflt(dst, one);
jcc(Assembler::above, DONE_LABEL);
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), noreg);
} else if (opcode == Op_SignumD) {
- ucomisd(dst, zero);
- jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
- jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
+ if (VM_Version::supports_avx10_2()) {
+ evucomxsd(dst, zero);
+ jcc(Assembler::negative, DONE_LABEL);
+ } else {
+ ucomisd(dst, zero);
+ jcc(Assembler::equal, DONE_LABEL);
+ }
movdbl(dst, one);
jcc(Assembler::above, DONE_LABEL);
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), noreg);
@@ -1668,12 +1706,8 @@ void C2_MacroAssembler::load_constant_vector(BasicType bt, XMMRegister dst, Inte
}
void C2_MacroAssembler::load_iota_indices(XMMRegister dst, int vlen_in_bytes, BasicType bt) {
- // The iota indices are ordered by type B/S/I/L/F/D, and the offset between two types is 64.
- int offset = exact_log2(type2aelembytes(bt)) << 6;
- if (is_floating_point_type(bt)) {
- offset += 128;
- }
- ExternalAddress addr(StubRoutines::x86::vector_iota_indices() + offset);
+ int entry_idx = vector_iota_entry_index(bt);
+ ExternalAddress addr(StubRoutines::x86::vector_iota_indices(entry_idx));
load_vector(T_BYTE, dst, addr, vlen_in_bytes);
}
@@ -1706,6 +1740,24 @@ void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegis
default: assert(false, "wrong type");
}
break;
+ case Op_UMinReductionV:
+ switch (typ) {
+ case T_BYTE: vpminub(dst, dst, src, Assembler::AVX_128bit); break;
+ case T_SHORT: vpminuw(dst, dst, src, Assembler::AVX_128bit); break;
+ case T_INT: vpminud(dst, dst, src, Assembler::AVX_128bit); break;
+ case T_LONG: evpminuq(dst, k0, dst, src, true, Assembler::AVX_128bit); break;
+ default: assert(false, "wrong type");
+ }
+ break;
+ case Op_UMaxReductionV:
+ switch (typ) {
+ case T_BYTE: vpmaxub(dst, dst, src, Assembler::AVX_128bit); break;
+ case T_SHORT: vpmaxuw(dst, dst, src, Assembler::AVX_128bit); break;
+ case T_INT: vpmaxud(dst, dst, src, Assembler::AVX_128bit); break;
+ case T_LONG: evpmaxuq(dst, k0, dst, src, true, Assembler::AVX_128bit); break;
+ default: assert(false, "wrong type");
+ }
+ break;
case Op_AddReductionVF: addss(dst, src); break;
case Op_AddReductionVD: addsd(dst, src); break;
case Op_AddReductionVI:
@@ -1769,6 +1821,24 @@ void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegis
default: assert(false, "wrong type");
}
break;
+ case Op_UMinReductionV:
+ switch (typ) {
+ case T_BYTE: vpminub(dst, src1, src2, vector_len); break;
+ case T_SHORT: vpminuw(dst, src1, src2, vector_len); break;
+ case T_INT: vpminud(dst, src1, src2, vector_len); break;
+ case T_LONG: evpminuq(dst, k0, src1, src2, true, vector_len); break;
+ default: assert(false, "wrong type");
+ }
+ break;
+ case Op_UMaxReductionV:
+ switch (typ) {
+ case T_BYTE: vpmaxub(dst, src1, src2, vector_len); break;
+ case T_SHORT: vpmaxuw(dst, src1, src2, vector_len); break;
+ case T_INT: vpmaxud(dst, src1, src2, vector_len); break;
+ case T_LONG: evpmaxuq(dst, k0, src1, src2, true, vector_len); break;
+ default: assert(false, "wrong type");
+ }
+ break;
case Op_AddReductionVI:
switch (typ) {
case T_BYTE: vpaddb(dst, src1, src2, vector_len); break;
@@ -2035,7 +2105,11 @@ void C2_MacroAssembler::reduce8B(int opcode, Register dst, Register src1, XMMReg
psrldq(vtmp2, 1);
reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2);
movdl(vtmp2, src1);
- pmovsxbd(vtmp1, vtmp1);
+ if (opcode == Op_UMinReductionV || opcode == Op_UMaxReductionV) {
+ pmovzxbd(vtmp1, vtmp1);
+ } else {
+ pmovsxbd(vtmp1, vtmp1);
+ }
reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
pextrb(dst, vtmp1, 0x0);
movsbl(dst, dst);
@@ -2072,8 +2146,8 @@ void C2_MacroAssembler::mulreduce16B(int opcode, Register dst, Register src1, XM
} else {
pmovsxbw(vtmp2, src2);
reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
- pshufd(vtmp2, src2, 0x1);
- pmovsxbw(vtmp2, src2);
+ pshufd(vtmp2, src2, 0xe);
+ pmovsxbw(vtmp2, vtmp2);
reduce8S(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
}
}
@@ -2082,7 +2156,7 @@ void C2_MacroAssembler::mulreduce32B(int opcode, Register dst, Register src1, XM
if (UseAVX > 2 && VM_Version::supports_avx512bw()) {
int vector_len = Assembler::AVX_512bit;
vpmovsxbw(vtmp1, src2, vector_len);
- reduce32S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+ reduce32S(opcode, dst, src1, vtmp1, vtmp2, vtmp1);
} else {
assert(UseAVX >= 2,"Should not reach here.");
mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2);
@@ -2112,7 +2186,11 @@ void C2_MacroAssembler::reduce4S(int opcode, Register dst, Register src1, XMMReg
reduce_operation_128(T_SHORT, opcode, vtmp1, vtmp2);
}
movdl(vtmp2, src1);
- pmovsxwd(vtmp1, vtmp1);
+ if (opcode == Op_UMinReductionV || opcode == Op_UMaxReductionV) {
+ pmovzxwd(vtmp1, vtmp1);
+ } else {
+ pmovsxwd(vtmp1, vtmp1);
+ }
reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
pextrw(dst, vtmp1, 0x0);
movswl(dst, dst);
@@ -2125,6 +2203,7 @@ void C2_MacroAssembler::reduce8S(int opcode, Register dst, Register src1, XMMReg
}
phaddw(vtmp1, src2);
} else {
+ assert_different_registers(src2, vtmp1);
pshufd(vtmp1, src2, 0xE);
reduce_operation_128(T_SHORT, opcode, vtmp1, src2);
}
@@ -2137,6 +2216,7 @@ void C2_MacroAssembler::reduce16S(int opcode, Register dst, Register src1, XMMRe
vphaddw(vtmp2, src2, src2, vector_len);
vpermq(vtmp2, vtmp2, 0xD8, vector_len);
} else {
+ assert_different_registers(src2, vtmp2);
vextracti128_high(vtmp2, src2);
reduce_operation_128(T_SHORT, opcode, vtmp2, src2);
}
@@ -2144,6 +2224,7 @@ void C2_MacroAssembler::reduce16S(int opcode, Register dst, Register src1, XMMRe
}
void C2_MacroAssembler::reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ assert_different_registers(src2, vtmp1);
int vector_len = Assembler::AVX_256bit;
vextracti64x4_high(vtmp1, src2);
reduce_operation_256(T_SHORT, opcode, vtmp1, vtmp1, src2);
@@ -2333,7 +2414,7 @@ void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_vali
}
if (VM_Version::supports_avx10_2()) {
- vminmax_fp(opcode, T_FLOAT, wdst, k0, wtmp, wsrc, vlen_enc);
+ vminmax_fp_avx10_2(opcode, T_FLOAT, wdst, k0, wtmp, wsrc, vlen_enc);
} else {
vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
}
@@ -2342,7 +2423,7 @@ void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_vali
}
if (is_dst_valid) {
if (VM_Version::supports_avx10_2()) {
- vminmax_fp(opcode, T_FLOAT, dst, k0, wdst, dst, Assembler::AVX_128bit);
+ vminmax_fp_avx10_2(opcode, T_FLOAT, dst, k0, wdst, dst, Assembler::AVX_128bit);
} else {
vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
}
@@ -2373,7 +2454,7 @@ void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_val
}
if (VM_Version::supports_avx10_2()) {
- vminmax_fp(opcode, T_DOUBLE, wdst, k0, wtmp, wsrc, vlen_enc);
+ vminmax_fp_avx10_2(opcode, T_DOUBLE, wdst, k0, wtmp, wsrc, vlen_enc);
} else {
vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
}
@@ -2384,7 +2465,7 @@ void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_val
if (is_dst_valid) {
if (VM_Version::supports_avx10_2()) {
- vminmax_fp(opcode, T_DOUBLE, dst, k0, wdst, dst, Assembler::AVX_128bit);
+ vminmax_fp_avx10_2(opcode, T_DOUBLE, dst, k0, wdst, dst, Assembler::AVX_128bit);
} else {
vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
}
@@ -6994,13 +7075,25 @@ void C2_MacroAssembler::evfp16ph(int opcode, XMMRegister dst, XMMRegister src1,
}
}
-void C2_MacroAssembler::scalar_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
- KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2) {
- vector_max_min_fp16(opcode, dst, src1, src2, ktmp, xtmp1, xtmp2, Assembler::AVX_128bit);
+void C2_MacroAssembler::sminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2) {
+ vminmax_fp16(opcode, dst, src1, src2, ktmp, xtmp1, xtmp2, Assembler::AVX_128bit);
}
-void C2_MacroAssembler::vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
- KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc) {
+void C2_MacroAssembler::sminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ KRegister ktmp) {
+ if (opcode == Op_MaxHF) {
+ // dst = max(src1, src2)
+ evminmaxsh(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
+ } else {
+ assert(opcode == Op_MinHF, "");
+ // dst = min(src1, src2)
+ evminmaxsh(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
+ }
+}
+
+void C2_MacroAssembler::vminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc) {
if (opcode == Op_MaxVHF || opcode == Op_MaxHF) {
// Move sign bits of src2 to mask register.
evpmovw2m(ktmp, src2, vlen_enc);
@@ -7043,3 +7136,48 @@ void C2_MacroAssembler::vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegi
Assembler::evmovdquw(dst, ktmp, xtmp1, true, vlen_enc);
}
}
+
+void C2_MacroAssembler::vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ KRegister ktmp, int vlen_enc) {
+ if (opcode == Op_MaxVHF) {
+ // dst = max(src1, src2)
+ evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vlen_enc);
+ } else {
+ assert(opcode == Op_MinVHF, "");
+ // dst = min(src1, src2)
+ evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MIN_COMPARE_SIGN, vlen_enc);
+ }
+}
+
+void C2_MacroAssembler::vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, Address src2,
+ KRegister ktmp, int vlen_enc) {
+ if (opcode == Op_MaxVHF) {
+ // dst = max(src1, src2)
+ evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vlen_enc);
+ } else {
+ assert(opcode == Op_MinVHF, "");
+ // dst = min(src1, src2)
+ evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MIN_COMPARE_SIGN, vlen_enc);
+ }
+}
+
+int C2_MacroAssembler::vector_iota_entry_index(BasicType bt) {
+ // The vector iota entries array is ordered by type B/S/I/L/F/D, and
+ // the offset between two types is 16.
+ switch(bt) {
+ case T_BYTE:
+ return 0;
+ case T_SHORT:
+ return 1;
+ case T_INT:
+ return 2;
+ case T_LONG:
+ return 3;
+ case T_FLOAT:
+ return 4;
+ case T_DOUBLE:
+ return 5;
+ default:
+ ShouldNotReachHere();
+ }
+}
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
index 6d8b0ceaebe..9b229ad7221 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -67,8 +67,11 @@ public:
XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
int vlen_enc);
- void vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
- XMMRegister src1, XMMRegister src2, int vlen_enc);
+ void vminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
+ XMMRegister src1, XMMRegister src2, int vlen_enc);
+
+ void sminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
+ XMMRegister src1, XMMRegister src2);
void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
@@ -576,12 +579,22 @@ public:
void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
- void vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
- KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
+ void vminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
- void scalar_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
- KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2);
+ void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ KRegister ktmp, int vlen_enc);
+
+ void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, Address src2,
+ KRegister ktmp, int vlen_enc);
+
+ void sminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2);
+
+ void sminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ KRegister ktmp);
void reconstruct_frame_pointer(Register rtmp);
+ int vector_iota_entry_index(BasicType bt);
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/c2_globals_x86.hpp b/src/hotspot/cpu/x86/c2_globals_x86.hpp
index 3f616cb4578..11d8c03d0ca 100644
--- a/src/hotspot/cpu/x86/c2_globals_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_globals_x86.hpp
@@ -46,7 +46,6 @@ define_pd_global(intx, FreqInlineSize, 325);
define_pd_global(intx, MinJumpTableSize, 10);
define_pd_global(intx, LoopPercentProfileLimit, 10);
define_pd_global(intx, InteriorEntryAlignment, 16);
-define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, LoopUnrollLimit, 60);
// InitialCodeCacheSize derived from specjbb2000 run.
define_pd_global(size_t, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize
@@ -74,7 +73,4 @@ define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on x86.
-// Ergonomics related flags
-define_pd_global(bool, NeverActAsServerClassMachine, false);
-
#endif // CPU_X86_C2_GLOBALS_X86_HPP
diff --git a/src/hotspot/cpu/x86/downcallLinker_x86_64.cpp b/src/hotspot/cpu/x86/downcallLinker_x86_64.cpp
index c48940198ea..e3bf5f17fe9 100644
--- a/src/hotspot/cpu/x86/downcallLinker_x86_64.cpp
+++ b/src/hotspot/cpu/x86/downcallLinker_x86_64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -145,10 +145,10 @@ void DowncallLinker::StubGenerator::generate() {
// when we don't use a return buffer we need to spill the return value around our slow path calls
bool should_save_return_value = !_needs_return_buffer;
RegSpiller out_reg_spiller(_output_registers);
- int spill_rsp_offset = -1;
+ int out_spill_rsp_offset = -1;
if (should_save_return_value) {
- spill_rsp_offset = 0;
+ out_spill_rsp_offset = 0;
// spill area can be shared with shadow space and out args,
// since they are only used before the call,
// and spill area is only used after.
@@ -173,6 +173,9 @@ void DowncallLinker::StubGenerator::generate() {
// FP-> | |
// |---------------------| = frame_bottom_offset = frame_size
// | (optional) |
+ // | in_reg_spiller area |
+ // |---------------------|
+ // | (optional) |
// | capture state buf |
// |---------------------| = StubLocations::CAPTURED_STATE_BUFFER
// | (optional) |
@@ -188,6 +191,18 @@ void DowncallLinker::StubGenerator::generate() {
VMStorage shuffle_reg = as_VMStorage(rbx);
ArgumentShuffle arg_shuffle(filtered_java_regs, out_regs, shuffle_reg);
+ // Need to spill for state capturing runtime call.
+ // The area spilled into is distinct from the capture state buffer.
+ RegSpiller in_reg_spiller(out_regs);
+ int in_spill_rsp_offset = -1;
+ if (_captured_state_mask != 0) {
+ // The spill area cannot be shared with the shadow/out args space
+ // since spilling needs to happen before the call. Allocate a new
+ // region in the stack for this spill space.
+ in_spill_rsp_offset = allocated_frame_size;
+ allocated_frame_size += in_reg_spiller.spill_size_bytes();
+ }
+
#ifndef PRODUCT
LogTarget(Trace, foreign, downcall) lt;
if (lt.is_enabled()) {
@@ -232,6 +247,19 @@ void DowncallLinker::StubGenerator::generate() {
arg_shuffle.generate(_masm, shuffle_reg, 0, _abi._shadow_space_bytes);
__ block_comment("} argument shuffle");
+ if (_captured_state_mask != 0) {
+ assert(in_spill_rsp_offset != -1, "must be");
+ __ block_comment("{ load initial thread local");
+ in_reg_spiller.generate_spill(_masm, in_spill_rsp_offset);
+
+ // Copy the contents of the capture state buffer into thread local
+ __ movptr(c_rarg0, Address(rsp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
+ __ movl(c_rarg1, _captured_state_mask);
+ runtime_call(_masm, CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_pre));
+
+ in_reg_spiller.generate_fill(_masm, in_spill_rsp_offset);
+ __ block_comment("} load initial thread local");
+ }
__ call(as_Register(locs.get(StubLocations::TARGET_ADDRESS)));
assert(!_abi.is_volatile_reg(r15_thread), "Call assumed not to kill r15");
@@ -258,15 +286,15 @@ void DowncallLinker::StubGenerator::generate() {
__ block_comment("{ save thread local");
if (should_save_return_value) {
- out_reg_spiller.generate_spill(_masm, spill_rsp_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_rsp_offset);
}
__ movptr(c_rarg0, Address(rsp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
__ movl(c_rarg1, _captured_state_mask);
- runtime_call(_masm, CAST_FROM_FN_PTR(address, DowncallLinker::capture_state));
+ runtime_call(_masm, CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_post));
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_rsp_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_rsp_offset);
}
__ block_comment("} save thread local");
@@ -319,14 +347,14 @@ void DowncallLinker::StubGenerator::generate() {
__ bind(L_safepoint_poll_slow_path);
if (should_save_return_value) {
- out_reg_spiller.generate_spill(_masm, spill_rsp_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_rsp_offset);
}
__ mov(c_rarg0, r15_thread);
runtime_call(_masm, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_rsp_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_rsp_offset);
}
__ jmp(L_after_safepoint_poll);
@@ -338,13 +366,13 @@ void DowncallLinker::StubGenerator::generate() {
__ bind(L_reguard);
if (should_save_return_value) {
- out_reg_spiller.generate_spill(_masm, spill_rsp_offset);
+ out_reg_spiller.generate_spill(_masm, out_spill_rsp_offset);
}
runtime_call(_masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
if (should_save_return_value) {
- out_reg_spiller.generate_fill(_masm, spill_rsp_offset);
+ out_reg_spiller.generate_fill(_masm, out_spill_rsp_offset);
}
__ jmp(L_after_reguard);
diff --git a/src/hotspot/cpu/x86/frame_x86.inline.hpp b/src/hotspot/cpu/x86/frame_x86.inline.hpp
index dcd766545d3..3f3b951edc8 100644
--- a/src/hotspot/cpu/x86/frame_x86.inline.hpp
+++ b/src/hotspot/cpu/x86/frame_x86.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -231,8 +231,8 @@ inline bool frame::equal(frame other) const {
// Return unique id for this frame. The id must have a value where we can distinguish
// identity and younger/older relationship. null represents an invalid (incomparable)
-// frame.
-inline intptr_t* frame::id(void) const { return unextended_sp(); }
+// frame. Should not be called for heap frames.
+inline intptr_t* frame::id(void) const { return real_fp(); }
// Return true if the frame is older (less recent activation) than the frame represented by id
inline bool frame::is_older(intptr_t* id) const { assert(this->id() != nullptr && id != nullptr, "null frame id");
@@ -397,6 +397,9 @@ inline frame frame::sender(RegisterMap* map) const {
StackWatermarkSet::on_iteration(map->thread(), result);
}
+ // Calling frame::id() is currently not supported for heap frames.
+ assert(result._on_heap || this->_on_heap || result.is_older(this->id()), "Must be");
+
return result;
}
diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
index 34de9403ccf..b20d7b5cd07 100644
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
@@ -23,6 +23,7 @@
*/
#include "asm/macroAssembler.inline.hpp"
+#include "code/aotCodeCache.hpp"
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1BarrierSetAssembler.hpp"
#include "gc/g1/g1BarrierSetRuntime.hpp"
@@ -268,6 +269,16 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
__ bind(done);
}
+#if INCLUDE_CDS
+// return a register that differs from reg1, reg2, reg3 and reg4
+
+static Register pick_different_reg(Register reg1, Register reg2 = noreg, Register reg3= noreg, Register reg4 = noreg) {
+ RegSet available = (RegSet::of(rscratch1, rscratch2, rax, rbx) + rdx -
+ RegSet::of(reg1, reg2, reg3, reg4));
+ return *(available.begin());
+}
+#endif // INCLUDE_CDS
+
static void generate_post_barrier(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
@@ -280,10 +291,32 @@ static void generate_post_barrier(MacroAssembler* masm,
Label L_done;
// Does store cross heap regions?
- __ movptr(tmp1, store_addr); // tmp1 := store address
- __ xorptr(tmp1, new_val); // tmp1 := store address ^ new value
- __ shrptr(tmp1, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
- __ jccb(Assembler::equal, L_done);
+#if INCLUDE_CDS
+ // AOT code needs to load the barrier grain shift from the aot
+ // runtime constants area in the code cache otherwise we can compile
+ // it as an immediate operand
+
+ if (AOTCodeCache::is_on_for_dump()) {
+ address grain_shift_addr = AOTRuntimeConstants::grain_shift_address();
+ Register save = pick_different_reg(rcx, tmp1, new_val, store_addr);
+ __ push(save);
+ __ movptr(save, store_addr);
+ __ xorptr(save, new_val);
+ __ push(rcx);
+ __ lea(rcx, ExternalAddress(grain_shift_addr));
+ __ movl(rcx, Address(rcx, 0));
+ __ shrptr(save);
+ __ pop(rcx);
+ __ pop(save);
+ __ jcc(Assembler::equal, L_done);
+ } else
+#endif // INCLUDE_CDS
+ {
+ __ movptr(tmp1, store_addr); // tmp1 := store address
+ __ xorptr(tmp1, new_val); // tmp1 := store address ^ new value
+ __ shrptr(tmp1, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
+ __ jccb(Assembler::equal, L_done);
+ }
// Crosses regions, storing null?
if (new_val_may_be_null) {
diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp
index 09c5d93dbb3..215dc30f7fd 100644
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -395,6 +395,11 @@ OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Na
extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);
+void BarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Label& slowpath) {
+ // Load the oop from the weak handle.
+ __ movptr(obj, Address(obj));
+}
+
#undef __
#define __ _masm->
diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp
index c5bf17c3b4e..6aff29850e3 100644
--- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -109,6 +109,8 @@ public:
#ifdef COMPILER2
OptoReg::Name refine_register(const Node* node,
OptoReg::Name opto_reg);
+
+ virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Label& slowpath);
#endif // COMPILER2
};
diff --git a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp
index 2b91662ddb5..c05f37a3bea 100644
--- a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -23,6 +23,7 @@
*/
#include "asm/macroAssembler.inline.hpp"
+#include "code/aotCodeCache.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/cardTable.hpp"
#include "gc/shared/cardTableBarrierSet.hpp"
@@ -95,11 +96,7 @@ void CardTableBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet d
void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {
- BarrierSet *bs = BarrierSet::barrier_set();
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
- CardTable* ct = ctbs->card_table();
- intptr_t disp = (intptr_t) ct->byte_map_base();
- SHENANDOAHGC_ONLY(assert(!UseShenandoahGC, "Shenandoah byte_map_base is not constant.");)
+ CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set();
Label L_loop, L_done;
const Register end = count;
@@ -115,7 +112,15 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
__ shrptr(end, CardTable::card_shift());
__ subptr(end, addr); // end --> cards count
- __ mov64(tmp, disp);
+#if INCLUDE_CDS
+ if (AOTCodeCache::is_on_for_dump()) {
+ __ lea(tmp, ExternalAddress(AOTRuntimeConstants::card_table_base_address()));
+ __ movq(tmp, Address(tmp, 0));
+ } else
+#endif
+ {
+ __ mov64(tmp, (intptr_t)ctbs->card_table_base_const());
+ }
__ addptr(addr, tmp);
__ BIND(L_loop);
__ movb(Address(addr, count, Address::times_1), 0);
@@ -125,24 +130,30 @@ __ BIND(L_loop);
__ BIND(L_done);
}
-void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {
+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register rscratch) {
// Does a store check for the oop in register obj. The content of
// register obj is destroyed afterwards.
- BarrierSet* bs = BarrierSet::barrier_set();
-
- CardTableBarrierSet* ctbs = barrier_set_cast(bs);
- CardTable* ct = ctbs->card_table();
+ CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set();
__ shrptr(obj, CardTable::card_shift());
Address card_addr;
+ precond(rscratch != noreg);
+ assert_different_registers(obj, rscratch);
// The calculation for byte_map_base is as follows:
// byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
// So this essentially converts an address to a displacement and it will
// never need to be relocated. On 64bit however the value may be too
// large for a 32bit displacement.
- intptr_t byte_map_base = (intptr_t)ct->byte_map_base();
+ intptr_t byte_map_base = (intptr_t)ctbs->card_table_base_const();
+#if INCLUDE_CDS
+ if (AOTCodeCache::is_on_for_dump()) {
+ __ lea(rscratch, ExternalAddress(AOTRuntimeConstants::card_table_base_address()));
+ __ movq(rscratch, Address(rscratch, 0));
+ card_addr = Address(rscratch, obj, Address::times_1, 0);
+ } else
+#endif
if (__ is_simm32(byte_map_base)) {
card_addr = Address(noreg, obj, Address::times_1, byte_map_base);
} else {
@@ -152,7 +163,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
// entry and that entry is not properly handled by the relocation code.
AddressLiteral cardtable((address)byte_map_base, relocInfo::none);
Address index(noreg, obj, Address::times_1);
- card_addr = __ as_Address(ArrayAddress(cardtable, index), rscratch1);
+ card_addr = __ as_Address(ArrayAddress(cardtable, index), rscratch);
}
int dirty = CardTable::dirty_card_val();
@@ -181,10 +192,10 @@ void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorS
if (needs_post_barrier) {
// flatten object address if needed
if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
- store_check(masm, dst.base(), dst);
+ store_check(masm, dst.base(), tmp2);
} else {
__ lea(tmp1, dst);
- store_check(masm, tmp1, dst);
+ store_check(masm, tmp1, tmp2);
}
}
}
diff --git a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.hpp
index 0a36571c757..c38e16d4d5f 100644
--- a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,7 +33,7 @@ protected:
virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count) {}
- void store_check(MacroAssembler* masm, Register obj, Address dst);
+ void store_check(MacroAssembler* masm, Register obj, Register rscratch);
virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
index 9e321391f6c..67510fac58f 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -174,24 +175,14 @@ void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, Dec
}
}
-void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register tmp,
- bool tosca_live,
- bool expand_call) {
+void ShenandoahBarrierSetAssembler::satb_barrier(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call) {
+ assert(ShenandoahSATBBarrier, "Should be checked by caller");
- if (ShenandoahSATBBarrier) {
- satb_write_barrier_pre(masm, obj, pre_val, tmp, tosca_live, expand_call);
- }
-}
-
-void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register tmp,
- bool tosca_live,
- bool expand_call) {
// If expand_call is true then we expand the call_VM_leaf macro
// directly to skip generating the check by
// InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
@@ -533,18 +524,18 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
assert_different_registers(dst, tmp1, r15_thread);
// Generate the SATB pre-barrier code to log the value of
// the referent field in an SATB buffer.
- shenandoah_write_barrier_pre(masm /* masm */,
- noreg /* obj */,
- dst /* pre_val */,
- tmp1 /* tmp */,
- true /* tosca_live */,
- true /* expand_call */);
+ satb_barrier(masm /* masm */,
+ noreg /* obj */,
+ dst /* pre_val */,
+ tmp1 /* tmp */,
+ true /* tosca_live */,
+ true /* expand_call */);
restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
}
}
-void ShenandoahBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj) {
+void ShenandoahBarrierSetAssembler::card_barrier(MacroAssembler* masm, Register obj) {
assert(ShenandoahCardBarrier, "Should have been checked by caller");
// Does a store check for the oop in register obj. The content of
@@ -575,41 +566,40 @@ void ShenandoahBarrierSetAssembler::store_check(MacroAssembler* masm, Register o
void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
- bool on_oop = is_reference_type(type);
- bool in_heap = (decorators & IN_HEAP) != 0;
- bool as_normal = (decorators & AS_NORMAL) != 0;
- if (on_oop && in_heap) {
- bool needs_pre_barrier = as_normal;
+ // 1: non-reference types require no barriers
+ if (!is_reference_type(type)) {
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
+ return;
+ }
- // flatten object address if needed
- // We do it regardless of precise because we need the registers
- if (dst.index() == noreg && dst.disp() == 0) {
- if (dst.base() != tmp1) {
- __ movptr(tmp1, dst.base());
- }
- } else {
- __ lea(tmp1, dst);
- }
-
- assert_different_registers(val, tmp1, tmp2, tmp3, r15_thread);
-
- if (needs_pre_barrier) {
- shenandoah_write_barrier_pre(masm /*masm*/,
- tmp1 /* obj */,
- tmp2 /* pre_val */,
- tmp3 /* tmp */,
- val != noreg /* tosca_live */,
- false /* expand_call */);
- }
-
- BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
- if (val != noreg) {
- if (ShenandoahCardBarrier) {
- store_check(masm, tmp1);
- }
+ // Flatten object address right away for simplicity: likely needed by barriers
+ assert_different_registers(val, tmp1, tmp2, tmp3, r15_thread);
+ if (dst.index() == noreg && dst.disp() == 0) {
+ if (dst.base() != tmp1) {
+ __ movptr(tmp1, dst.base());
}
} else {
- BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
+ __ lea(tmp1, dst);
+ }
+
+ bool storing_non_null = (val != noreg);
+
+ // 2: pre-barrier: SATB needs the previous value
+ if (ShenandoahBarrierSet::need_satb_barrier(decorators, type)) {
+ satb_barrier(masm,
+ tmp1 /* obj */,
+ tmp2 /* pre_val */,
+ tmp3 /* tmp */,
+ storing_non_null /* tosca_live */,
+ false /* expand_call */);
+ }
+
+ // Store!
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg);
+
+ // 3: post-barrier: card barrier needs store address
+ if (ShenandoahBarrierSet::need_card_barrier(decorators, type) && storing_non_null) {
+ card_barrier(masm, tmp1);
}
}
@@ -629,6 +619,27 @@ void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler
__ bind(done);
}
+#ifdef COMPILER2
+void ShenandoahBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Label& slowpath) {
+ Label done;
+
+ // Resolve weak handle using the standard implementation.
+ BarrierSetAssembler::try_resolve_weak_handle_in_c2(masm, obj, slowpath);
+
+ // Check if the reference is null, and if it is, take the fast path.
+ __ testptr(obj, obj);
+ __ jcc(Assembler::zero, done);
+
+ Address gc_state(r15_thread, ShenandoahThreadLocalData::gc_state_offset());
+
+ // Check if the heap is under weak-reference/roots processing, in
+ // which case we need to take the slow path.
+ __ testb(gc_state, ShenandoahHeap::WEAK_ROOTS);
+ __ jcc(Assembler::notZero, slowpath);
+ __ bind(done);
+}
+#endif // COMPILER2
+
// Special Shenandoah CAS implementation that handles false negatives
// due to concurrent evacuation.
void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
index b0185f2dbff..79540aa19e1 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -41,21 +42,14 @@ class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
private:
- void satb_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register tmp,
- bool tosca_live,
- bool expand_call);
+ void satb_barrier(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call);
- void shenandoah_write_barrier_pre(MacroAssembler* masm,
- Register obj,
- Register pre_val,
- Register tmp,
- bool tosca_live,
- bool expand_call);
-
- void store_check(MacroAssembler* masm, Register obj);
+ void card_barrier(MacroAssembler* masm, Register obj);
void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count,
@@ -84,6 +78,9 @@ public:
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
+#ifdef COMPILER2
+ virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Label& slowpath);
+#endif // COMPILER2
};
#endif // CPU_X86_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
index ae93cca8c19..c20551b5084 100644
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
#include "gc/z/zBarrierSetAssembler.hpp"
#include "gc/z/zBarrierSetRuntime.hpp"
#include "gc/z/zThreadLocalData.hpp"
+#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/jniHandles.hpp"
#include "runtime/sharedRuntime.hpp"
@@ -1328,6 +1329,19 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm,
__ jmp(slow_continuation);
}
+void ZBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Label& slow_path) {
+ // Resolve weak handle using the standard implementation.
+ BarrierSetAssembler::try_resolve_weak_handle_in_c2(masm, obj, slow_path);
+
+ // Check if the oop is bad, in which case we need to take the slow path.
+ __ testptr(obj, Address(r15_thread, ZThreadLocalData::mark_bad_mask_offset()));
+ __ jcc(Assembler::notZero, slow_path);
+
+ // Oop is okay, so we uncolor it.
+ __ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatLoadGoodBeforeShl);
+ __ shrq(obj, barrier_Relocation::unpatched);
+}
+
#undef __
#endif // COMPILER2
@@ -1378,10 +1392,13 @@ static uint16_t patch_barrier_relocation_value(int format) {
}
}
-void ZBarrierSetAssembler::patch_barrier_relocation(address addr, int format) {
+void ZBarrierSetAssembler::patch_barrier_relocation(address addr, int format, bool log) {
const int offset = patch_barrier_relocation_offset(format);
const uint16_t value = patch_barrier_relocation_value(format);
uint8_t* const patch_addr = (uint8_t*)addr + offset;
+ if (log) {
+ log_trace(aot, codecache, stubs)("patching address " INTPTR_FORMAT " offset %d value 0x%x", p2i(addr), offset, value);
+ }
if (format == ZBarrierRelocationFormatLoadGoodBeforeShl) {
if (VM_Version::supports_apx_f()) {
NativeInstruction* instruction = nativeInstruction_at(addr);
@@ -1413,6 +1430,74 @@ void ZBarrierSetAssembler::patch_barriers() {
#undef __
#define __ masm->
+void ZBarrierSetAssembler::register_reloc_addresses(GrowableArray &entries, int begin, int count) {
+ int formats[] = {
+ ZBarrierRelocationFormatLoadBadAfterTest,
+ ZBarrierRelocationFormatStoreBadAfterTest,
+ ZBarrierRelocationFormatStoreGoodAfterOr,
+ -1
+ };
+ int format_idx = 0;
+ int format = formats[format_idx++];
+ for (int i = begin; i < begin + count; i++) {
+ address addr = entries.at(i);
+ // reloc addresses occur in 3 groups terminated with a nullptr
+ if (addr == nullptr) {
+ assert(format_idx < (int)(sizeof(formats) / sizeof(formats[0])),
+ "too many reloc groups");
+ format = formats[format_idx++];
+ } else {
+ switch(format) {
+ case ZBarrierRelocationFormatLoadBadAfterTest:
+ _load_bad_relocations.append(addr);
+ break;
+ case ZBarrierRelocationFormatStoreBadAfterTest:
+ _store_bad_relocations.append(addr);
+ break;
+ case ZBarrierRelocationFormatStoreGoodAfterOr:
+ _store_good_relocations.append(addr);
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ patch_barrier_relocation(addr, format, true);
+ }
+ }
+ assert(format == -1, "unterminated format list");
+}
+
+void ZBarrierSetAssembler::retrieve_reloc_addresses(address start, address end, GrowableArray &entries) {
+ assert(start != nullptr, "start address must not be null");
+ assert(end != nullptr, "start address must not be null");
+ assert(start < end, "stub range must not be empty");
+ for (int i = 0; i < _load_bad_relocations.length(); i++) {
+ address addr = _load_bad_relocations.at(i);
+ assert(addr != nullptr, "load bad reloc address shoudl not be null!");
+ if (start <= addr && addr < end) {
+ entries.append(addr);
+ }
+ }
+ entries.append(nullptr);
+ for (int i = 0; i < _store_bad_relocations.length(); i++) {
+ address addr = _store_bad_relocations.at(i);
+ assert(addr != nullptr, "store bad reloc address shoudl not be null!");
+ if (start <= addr && addr < end) {
+ entries.append(addr);
+ }
+ }
+ entries.append(nullptr);
+ for (int i = 0; i < _store_good_relocations.length(); i++) {
+ address addr = _store_good_relocations.at(i);
+ assert(addr != nullptr, "store good reloc address shoudl not be null!");
+ if (start <= addr && addr < end) {
+ entries.append(addr);
+ }
+ }
+ entries.append(nullptr);
+}
+
+
void ZBarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
// C1 calls verfy_oop in the middle of barriers, before they have been uncolored
// and after being colored. Therefore, we must deal with colored oops as well.
diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
index 19902500f93..ce0c4769716 100644
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -167,6 +167,8 @@ public:
ZLoadBarrierStubC2* stub) const;
void generate_c2_store_barrier_stub(MacroAssembler* masm,
ZStoreBarrierStubC2* stub) const;
+
+ virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Label& slow_path);
#endif // COMPILER2
void store_barrier_fast(MacroAssembler* masm,
@@ -187,10 +189,14 @@ public:
Label& slow_path,
Label& slow_path_continuation) const;
- void patch_barrier_relocation(address addr, int format);
+ void patch_barrier_relocation(address addr, int format, bool log = false);
void patch_barriers();
+ void register_reloc_addresses(GrowableArray &entries, int begin, int count);
+
+ void retrieve_reloc_addresses(address start, address end, GrowableArray &entries);
+
void check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error);
};
diff --git a/src/hotspot/cpu/x86/globals_x86.hpp b/src/hotspot/cpu/x86/globals_x86.hpp
index 103e22d0185..6de46752790 100644
--- a/src/hotspot/cpu/x86/globals_x86.hpp
+++ b/src/hotspot/cpu/x86/globals_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -46,9 +46,9 @@ define_pd_global(size_t, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRES
// the uep and the vep doesn't get real alignment but just slops on by
// only assured that the entry instruction meets the 5 byte size requirement.
#if COMPILER2_OR_JVMCI
-define_pd_global(intx, CodeEntryAlignment, 32);
+define_pd_global(uint, CodeEntryAlignment, 32);
#else
-define_pd_global(intx, CodeEntryAlignment, 16);
+define_pd_global(uint, CodeEntryAlignment, 16);
#endif // COMPILER2_OR_JVMCI
define_pd_global(intx, OptoLoopAlignment, 16);
define_pd_global(intx, InlineSmallCode, 1000);
@@ -117,9 +117,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
product(bool, UseIncDec, true, DIAGNOSTIC, \
"Use INC, DEC instructions on x86") \
\
- product(bool, UseNewLongLShift, false, \
- "Use optimized bitwise shift left") \
- \
product(bool, UseAddressNop, false, \
"Use '0F 1F [addr]' NOP instructions on x86 cpus") \
\
@@ -168,16 +165,27 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
"Perform Ecore Optimization") \
\
/* Minimum array size in bytes to use AVX512 intrinsics */ \
- /* for copy, inflate and fill which don't bail out early based on any */ \
+ /* for inflate and fill which don't bail out early based on any */ \
/* condition. When this value is set to zero compare operations like */ \
/* compare, vectorizedMismatch, compress can also use AVX512 intrinsics.*/\
product(int, AVX3Threshold, 4096, DIAGNOSTIC, \
"Minimum array size in bytes to use AVX512 intrinsics" \
- "for copy, inflate and fill. When this value is set as zero" \
+ "for inflate and fill. When this value is set as zero" \
"compare operations can also use AVX512 intrinsics.") \
range(0, max_jint) \
constraint(AVX3ThresholdConstraintFunc,AfterErgo) \
\
+ /* Minimum array size in bytes to use AVX512 intrinsics */ \
+ /* for copy and fill which don't bail out early based on any */ \
+ /* condition. When this value is set to zero clear operations that */ \
+ /* work on memory blocks can also use AVX512 intrinsics. */ \
+ product(int, CopyAVX3Threshold, 4096, DIAGNOSTIC, \
+ "Minimum array size in bytes to use AVX512 intrinsics" \
+ "for copy and fill. When this value is set as zero" \
+ "clear operations can also use AVX512 intrinsics.") \
+ range(0, max_jint) \
+ constraint(CopyAVX3ThresholdConstraintFunc,AfterErgo) \
+ \
product(bool, IntelJccErratumMitigation, true, DIAGNOSTIC, \
"Turn off JVM mitigations related to Intel micro code " \
"mitigations for the Intel JCC erratum") \
diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp
index b2ea4143ac4..a38971c86fb 100644
--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1392,28 +1392,15 @@ void InterpreterMacroAssembler::profile_final_call(Register mdp) {
void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
- Register mdp,
- bool receiver_can_be_null) {
+ Register mdp) {
if (ProfileInterpreter) {
Label profile_continue;
// If no method data exists, go to profile_continue.
test_method_data_pointer(mdp, profile_continue);
- Label skip_receiver_profile;
- if (receiver_can_be_null) {
- Label not_null;
- testptr(receiver, receiver);
- jccb(Assembler::notZero, not_null);
- // We are making a call. Increment the count for null receiver.
- increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
- jmp(skip_receiver_profile);
- bind(not_null);
- }
-
// Record the receiver type.
profile_receiver_type(receiver, mdp, 0);
- bind(skip_receiver_profile);
// The method data pointer needs to be updated to reflect the new target.
update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
diff --git a/src/hotspot/cpu/x86/interp_masm_x86.hpp b/src/hotspot/cpu/x86/interp_masm_x86.hpp
index 4114028f78e..dfbd7ab64e0 100644
--- a/src/hotspot/cpu/x86/interp_masm_x86.hpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -243,8 +243,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
void profile_not_taken_branch(Register mdp);
void profile_call(Register mdp);
void profile_final_call(Register mdp);
- void profile_virtual_call(Register receiver, Register mdp,
- bool receiver_can_be_null = false);
+ void profile_virtual_call(Register receiver, Register mdp);
void profile_ret(Register return_bci, Register mdp);
void profile_null_seen(Register mdp);
void profile_typecheck(Register mdp, Register klass);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index 7f7bb2c4c7f..5ab3ca339aa 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -385,7 +385,8 @@ void MacroAssembler::warn(const char* msg) {
// Windows always allocates space for its register args
subq(rsp, frame::arg_reg_save_area_bytes);
#endif
- lea(c_rarg0, ExternalAddress((address) msg));
+ const char* str = (code_section()->scratch_emit()) ? msg : AOTCodeCache::add_C_string(msg);
+ lea(c_rarg0, ExternalAddress((address) str));
call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
#ifdef _WIN64
@@ -765,7 +766,7 @@ void MacroAssembler::align32() {
void MacroAssembler::align(uint modulus) {
// 8273459: Ensure alignment is possible with current segment alignment
- assert(modulus <= (uintx)CodeEntryAlignment, "Alignment must be <= CodeEntryAlignment");
+ assert(modulus <= CodeEntryAlignment, "Alignment must be <= CodeEntryAlignment");
align(modulus, offset());
}
@@ -961,7 +962,7 @@ void MacroAssembler::call(AddressLiteral entry, Register rscratch) {
void MacroAssembler::ic_call(address entry, jint method_index) {
RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
// Needs full 64-bit immediate for later patching.
- mov64(rax, (int64_t)Universe::non_oop_word());
+ Assembler::mov64(rax, (int64_t)Universe::non_oop_word());
call(AddressLiteral(entry, rh));
}
@@ -985,12 +986,9 @@ int MacroAssembler::ic_check(int end_alignment) {
if (UseCompactObjectHeaders) {
load_narrow_klass_compact(temp, receiver);
cmpl(temp, Address(data, CompiledICData::speculated_klass_offset()));
- } else if (UseCompressedClassPointers) {
+ } else {
movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
cmpl(temp, Address(data, CompiledICData::speculated_klass_offset()));
- } else {
- movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
- cmpptr(temp, Address(data, CompiledICData::speculated_klass_offset()));
}
// if inline cache check fails, then jump to runtime routine
@@ -1961,6 +1959,30 @@ void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src, Register rscrat
}
}
+void MacroAssembler::movhlf(XMMRegister dst, XMMRegister src, Register rscratch) {
+ if (VM_Version::supports_avx10_2()) {
+ evmovw(dst, src);
+ } else {
+ assert(rscratch != noreg, "missing");
+ evmovw(rscratch, src);
+ evmovw(dst, rscratch);
+ }
+}
+
+void MacroAssembler::mov64(Register dst, int64_t imm64) {
+ if (is_uimm32(imm64)) {
+ movl(dst, checked_cast(imm64));
+ } else if (is_simm32(imm64)) {
+ movq(dst, checked_cast(imm64));
+ } else {
+ Assembler::mov64(dst, imm64);
+ }
+}
+
+void MacroAssembler::mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format) {
+ Assembler::mov64(dst, imm64, rtype, format);
+}
+
void MacroAssembler::movptr(Register dst, Register src) {
movq(dst, src);
}
@@ -1971,13 +1993,7 @@ void MacroAssembler::movptr(Register dst, Address src) {
// src should NEVER be a real pointer. Use AddressLiteral for true pointers
void MacroAssembler::movptr(Register dst, intptr_t src) {
- if (is_uimm32(src)) {
- movl(dst, checked_cast(src));
- } else if (is_simm32(src)) {
- movq(dst, checked_cast(src));
- } else {
- mov64(dst, src);
- }
+ mov64(dst, src);
}
void MacroAssembler::movptr(Address dst, Register src) {
@@ -2656,6 +2672,17 @@ void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src, Register rscra
}
}
+void MacroAssembler::evucomxsd(XMMRegister dst, AddressLiteral src, Register rscratch) {
+ assert(rscratch != noreg || always_reachable(src), "missing");
+
+ if (reachable(src)) {
+ Assembler::evucomxsd(dst, as_Address(src));
+ } else {
+ lea(rscratch, src);
+ Assembler::evucomxsd(dst, Address(rscratch, 0));
+ }
+}
+
void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
@@ -2667,6 +2694,39 @@ void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src, Register rscra
}
}
+void MacroAssembler::evucomxss(XMMRegister dst, AddressLiteral src, Register rscratch) {
+ assert(rscratch != noreg || always_reachable(src), "missing");
+
+ if (reachable(src)) {
+ Assembler::evucomxss(dst, as_Address(src));
+ } else {
+ lea(rscratch, src);
+ Assembler::evucomxss(dst, Address(rscratch, 0));
+ }
+}
+
+void MacroAssembler::evucomish(XMMRegister dst, AddressLiteral src, Register rscratch) {
+ assert(rscratch != noreg || always_reachable(src), "missing");
+
+ if (reachable(src)) {
+ Assembler::evucomish(dst, as_Address(src));
+ } else {
+ lea(rscratch, src);
+ Assembler::evucomish(dst, Address(rscratch, 0));
+ }
+}
+
+void MacroAssembler::evucomxsh(XMMRegister dst, AddressLiteral src, Register rscratch) {
+ assert(rscratch != noreg || always_reachable(src), "missing");
+
+ if (reachable(src)) {
+ Assembler::evucomxsh(dst, as_Address(src));
+ } else {
+ lea(rscratch, src);
+ Assembler::evucomxsh(dst, Address(rscratch, 0));
+ }
+}
+
void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
@@ -5354,11 +5414,9 @@ void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
if (UseCompactObjectHeaders) {
load_narrow_klass_compact(dst, src);
decode_klass_not_null(dst, tmp);
- } else if (UseCompressedClassPointers) {
+ } else {
movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
decode_klass_not_null(dst, tmp);
- } else {
- movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
}
}
@@ -5366,12 +5424,8 @@ void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
assert(!UseCompactObjectHeaders, "not with compact headers");
assert_different_registers(src, tmp);
assert_different_registers(dst, tmp);
- if (UseCompressedClassPointers) {
- encode_klass_not_null(src, tmp);
- movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
- } else {
- movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
- }
+ encode_klass_not_null(src, tmp);
+ movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
}
void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
@@ -5380,10 +5434,8 @@ void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
assert_different_registers(klass, obj, tmp);
load_narrow_klass_compact(tmp, obj);
cmpl(klass, tmp);
- } else if (UseCompressedClassPointers) {
- cmpl(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
} else {
- cmpptr(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+ cmpl(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
}
}
@@ -5394,12 +5446,9 @@ void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Regi
load_narrow_klass_compact(tmp1, obj1);
load_narrow_klass_compact(tmp2, obj2);
cmpl(tmp1, tmp2);
- } else if (UseCompressedClassPointers) {
+ } else {
movl(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes()));
cmpl(tmp1, Address(obj2, oopDesc::klass_offset_in_bytes()));
- } else {
- movptr(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes()));
- cmpptr(tmp1, Address(obj2, oopDesc::klass_offset_in_bytes()));
}
}
@@ -5448,10 +5497,8 @@ void MacroAssembler::store_heap_oop_null(Address dst) {
void MacroAssembler::store_klass_gap(Register dst, Register src) {
assert(!UseCompactObjectHeaders, "Don't use with compact headers");
- if (UseCompressedClassPointers) {
- // Store to klass gap in destination
- movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
- }
+ // Store to klass gap in destination
+ movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
}
#ifdef ASSERT
@@ -5626,7 +5673,12 @@ void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src)
BLOCK_COMMENT("encode_and_move_klass_not_null {");
assert_different_registers(src, dst);
if (CompressedKlassPointers::base() != nullptr) {
- movptr(dst, -(intptr_t)CompressedKlassPointers::base());
+ if (AOTCodeCache::is_on_for_dump()) {
+ movptr(dst, ExternalAddress(CompressedKlassPointers::base_addr()));
+ negq(dst);
+ } else {
+ movptr(dst, -(intptr_t)CompressedKlassPointers::base());
+ }
addq(dst, src);
} else {
movptr(dst, src);
@@ -5641,7 +5693,6 @@ void MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
BLOCK_COMMENT("decode_klass_not_null {");
assert_different_registers(r, tmp);
// Note: it will change flags
- assert(UseCompressedClassPointers, "should only be used for compressed headers");
// Cannot assert, unverified entry point counts instructions (see .ad file)
// vtableStubs also counts instructions in pd_code_size_limit.
// Also do not verify_oop as this is called by verify_oop.
@@ -5663,7 +5714,6 @@ void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src)
BLOCK_COMMENT("decode_and_move_klass_not_null {");
assert_different_registers(src, dst);
// Note: it will change flags
- assert (UseCompressedClassPointers, "should only be used for compressed headers");
// Cannot assert, unverified entry point counts instructions (see .ad file)
// vtableStubs also counts instructions in pd_code_size_limit.
// Also do not verify_oop as this is called by verify_oop.
@@ -5676,7 +5726,11 @@ void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src)
} else {
if (CompressedKlassPointers::shift() <= Address::times_8) {
if (CompressedKlassPointers::base() != nullptr) {
- movptr(dst, (intptr_t)CompressedKlassPointers::base());
+ if (AOTCodeCache::is_on_for_dump()) {
+ movptr(dst, ExternalAddress(CompressedKlassPointers::base_addr()));
+ } else {
+ movptr(dst, (intptr_t)CompressedKlassPointers::base());
+ }
} else {
xorq(dst, dst);
}
@@ -5688,9 +5742,14 @@ void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src)
}
} else {
if (CompressedKlassPointers::base() != nullptr) {
- const intptr_t base_right_shifted =
- (intptr_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
- movptr(dst, base_right_shifted);
+ if (AOTCodeCache::is_on_for_dump()) {
+ movptr(dst, ExternalAddress(CompressedKlassPointers::base_addr()));
+ shrq(dst, CompressedKlassPointers::shift());
+ } else {
+ const intptr_t base_right_shifted =
+ (intptr_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift();
+ movptr(dst, base_right_shifted);
+ }
} else {
xorq(dst, dst);
}
@@ -5720,7 +5779,6 @@ void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
}
void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
- assert (UseCompressedClassPointers, "should only be used for compressed headers");
assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
int klass_index = oop_recorder()->find_index(k);
RelocationHolder rspec = metadata_Relocation::spec(klass_index);
@@ -5728,7 +5786,6 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
}
void MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
- assert (UseCompressedClassPointers, "should only be used for compressed headers");
assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
int klass_index = oop_recorder()->find_index(k);
RelocationHolder rspec = metadata_Relocation::spec(klass_index);
@@ -5754,7 +5811,6 @@ void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
}
void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) {
- assert (UseCompressedClassPointers, "should only be used for compressed headers");
assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
int klass_index = oop_recorder()->find_index(k);
RelocationHolder rspec = metadata_Relocation::spec(klass_index);
@@ -5762,7 +5818,6 @@ void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) {
}
void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
- assert (UseCompressedClassPointers, "should only be used for compressed headers");
assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
int klass_index = oop_recorder()->find_index(k);
RelocationHolder rspec = metadata_Relocation::spec(klass_index);
@@ -5771,7 +5826,7 @@ void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
void MacroAssembler::reinit_heapbase() {
if (UseCompressedOops) {
- if (Universe::heap() != nullptr) {
+ if (Universe::heap() != nullptr && !AOTCodeCache::is_on_for_dump()) {
if (CompressedOops::base() == nullptr) {
MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
} else {
@@ -5790,7 +5845,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
// cnt - number of qwords (8-byte words).
// base - start address, qword aligned.
Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
- bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
+ bool use64byteVector = (MaxVectorSize == 64) && (CopyAVX3Threshold == 0);
if (use64byteVector) {
vpxor(xtmp, xtmp, xtmp, AVX_512bit);
} else if (MaxVectorSize >= 32) {
@@ -5854,7 +5909,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
// Clearing constant sized memory using YMM/ZMM registers.
void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
assert(UseAVX > 2 && VM_Version::supports_avx512vl(), "");
- bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
+ bool use64byteVector = (MaxVectorSize > 32) && (CopyAVX3Threshold == 0);
int vector64_count = (cnt & (~0x7)) >> 3;
cnt = cnt & 0x7;
@@ -6079,8 +6134,8 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
// Fill 64-byte chunks
Label L_fill_64_bytes_loop_avx3, L_check_fill_64_bytes_avx2;
- // If number of bytes to fill < VM_Version::avx3_threshold(), perform fill using AVX2
- cmpptr(count, VM_Version::avx3_threshold());
+ // If number of bytes to fill < CopyAVX3Threshold, perform fill using AVX2
+ cmpptr(count, CopyAVX3Threshold);
jccb(Assembler::below, L_check_fill_64_bytes_avx2);
vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
@@ -6251,32 +6306,46 @@ void MacroAssembler::evpbroadcast(BasicType type, XMMRegister dst, Register src,
}
}
-// encode char[] to byte[] in ISO_8859_1 or ASCII
- //@IntrinsicCandidate
- //private static int implEncodeISOArray(byte[] sa, int sp,
- //byte[] da, int dp, int len) {
- // int i = 0;
- // for (; i < len; i++) {
- // char c = StringUTF16.getChar(sa, sp++);
- // if (c > '\u00FF')
- // break;
- // da[dp++] = (byte)c;
- // }
- // return i;
- //}
- //
- //@IntrinsicCandidate
- //private static int implEncodeAsciiArray(char[] sa, int sp,
- // byte[] da, int dp, int len) {
- // int i = 0;
- // for (; i < len; i++) {
- // char c = sa[sp++];
- // if (c >= '\u0080')
- // break;
- // da[dp++] = (byte)c;
- // }
- // return i;
- //}
+// Encode given char[]/byte[] to byte[] in ISO_8859_1 or ASCII
+//
+// @IntrinsicCandidate
+// int sun.nio.cs.ISO_8859_1.Encoder#encodeISOArray0(
+// char[] sa, int sp, byte[] da, int dp, int len) {
+// int i = 0;
+// for (; i < len; i++) {
+// char c = sa[sp++];
+// if (c > '\u00FF')
+// break;
+// da[dp++] = (byte) c;
+// }
+// return i;
+// }
+//
+// @IntrinsicCandidate
+// int java.lang.StringCoding.encodeISOArray0(
+// byte[] sa, int sp, byte[] da, int dp, int len) {
+// int i = 0;
+// for (; i < len; i++) {
+// char c = StringUTF16.getChar(sa, sp++);
+// if (c > '\u00FF')
+// break;
+// da[dp++] = (byte) c;
+// }
+// return i;
+// }
+//
+// @IntrinsicCandidate
+// int java.lang.StringCoding.encodeAsciiArray0(
+// char[] sa, int sp, byte[] da, int dp, int len) {
+// int i = 0;
+// for (; i < len; i++) {
+// char c = sa[sp++];
+// if (c >= '\u0080')
+// break;
+// da[dp++] = (byte) c;
+// }
+// return i;
+// }
void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
XMMRegister tmp1Reg, XMMRegister tmp2Reg,
XMMRegister tmp3Reg, XMMRegister tmp4Reg,
@@ -9141,7 +9210,7 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM
case T_FLOAT:
evminmaxps(dst, mask, nds, src, merge, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vector_len); break;
case T_DOUBLE:
- evminmaxps(dst, mask, nds, src, merge, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vector_len); break;
+ evminmaxpd(dst, mask, nds, src, merge, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}
@@ -9439,7 +9508,6 @@ void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register va
Label L_fill_zmm_sequence;
int shift = -1;
- int avx3threshold = VM_Version::avx3_threshold();
switch(type) {
case T_BYTE: shift = 0;
break;
@@ -9455,10 +9523,10 @@ void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register va
fatal("Unhandled type: %s\n", type2name(type));
}
- if ((avx3threshold != 0) || (MaxVectorSize == 32)) {
+ if ((CopyAVX3Threshold != 0) || (MaxVectorSize == 32)) {
if (MaxVectorSize == 64) {
- cmpq(count, avx3threshold >> shift);
+ cmpq(count, CopyAVX3Threshold >> shift);
jcc(Assembler::greater, L_fill_zmm_sequence);
}
@@ -9998,6 +10066,20 @@ void MacroAssembler::restore_legacy_gprs() {
addq(rsp, 16 * wordSize);
}
+void MacroAssembler::load_aotrc_address(Register reg, address a) {
+#if INCLUDE_CDS
+ assert(AOTRuntimeConstants::contains(a), "address out of range for data area");
+ if (AOTCodeCache::is_on_for_dump()) {
+ // all aotrc field addresses should be registered in the AOTCodeCache address table
+ lea(reg, ExternalAddress(a));
+ } else {
+ mov64(reg, (uint64_t)a);
+ }
+#else
+ ShouldNotReachHere();
+#endif
+}
+
void MacroAssembler::setcc(Assembler::Condition comparison, Register dst) {
if (VM_Version::supports_apx_f()) {
esetzucc(comparison, dst);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index 93e3529ac1e..021d2943ee8 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -162,6 +162,8 @@ class MacroAssembler: public Assembler {
void incrementq(AddressLiteral dst, Register rscratch = noreg);
+ void movhlf(XMMRegister dst, XMMRegister src, Register rscratch = noreg);
+
// Support optimal SSE move instructions.
void movflt(XMMRegister dst, XMMRegister src) {
if (dst-> encoding() == src->encoding()) return;
@@ -351,8 +353,7 @@ class MacroAssembler: public Assembler {
void load_klass(Register dst, Register src, Register tmp);
void store_klass(Register dst, Register src, Register tmp);
- // Compares the Klass pointer of an object to a given Klass (which might be narrow,
- // depending on UseCompressedClassPointers).
+ // Compares the narrow Klass pointer of an object to a given narrow Klass.
void cmp_klass(Register klass, Register obj, Register tmp);
// Compares the Klass pointer of two objects obj1 and obj2. Result is in the condition flags.
@@ -1309,14 +1310,30 @@ public:
void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
void subss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
+ void evucomish(XMMRegister dst, XMMRegister src) { Assembler::evucomish(dst, src); }
+ void evucomish(XMMRegister dst, Address src) { Assembler::evucomish(dst, src); }
+ void evucomish(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
+
+ void evucomxsh(XMMRegister dst, XMMRegister src) { Assembler::evucomxsh(dst, src); }
+ void evucomxsh(XMMRegister dst, Address src) { Assembler::evucomxsh(dst, src); }
+ void evucomxsh(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
+
void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
+ void evucomxss(XMMRegister dst, XMMRegister src) { Assembler::evucomxss(dst, src); }
+ void evucomxss(XMMRegister dst, Address src) { Assembler::evucomxss(dst, src); }
+ void evucomxss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
+
void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
+ void evucomxsd(XMMRegister dst, XMMRegister src) { Assembler::evucomxsd(dst, src); }
+ void evucomxsd(XMMRegister dst, Address src) { Assembler::evucomxsd(dst, src); }
+ void evucomxsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
+
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
void xorpd(XMMRegister dst, XMMRegister src);
void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
@@ -1861,6 +1878,9 @@ public:
void mov_metadata(Register dst, Metadata* obj);
void mov_metadata(Address dst, Metadata* obj, Register rscratch);
+ void mov64(Register dst, int64_t imm64);
+ void mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format);
+
void movptr(Register dst, Register src);
void movptr(Register dst, Address src);
void movptr(Register dst, AddressLiteral src);
@@ -2062,6 +2082,7 @@ public:
void save_legacy_gprs();
void restore_legacy_gprs();
+ void load_aotrc_address(Register reg, address a);
void setcc(Assembler::Condition comparison, Register dst);
};
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp
index 9f0232075cd..401d5dc22cc 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp
@@ -242,7 +242,6 @@ void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegiste
Label done_hash, loop0;
address K256 = StubRoutines::x86::k256_addr();
- address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr();
movdqu(state0, Address(state, 0));
movdqu(state1, Address(state, 16));
@@ -253,7 +252,7 @@ void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegiste
palignr(state0, state1, 8);
pblendw(state1, msgtmp4, 0xF0);
- movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask));
+ movdqu(shuf_mask, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_addr()));
lea(rax, ExternalAddress(K256));
bind(loop0);
@@ -661,8 +660,6 @@ void MacroAssembler::sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegiste
compute_size1, compute_size_end1;
address K256_W = StubRoutines::x86::k256_W_addr();
- address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr();
- address pshuffle_byte_flip_mask_addr = nullptr;
const XMMRegister& SHUF_00BA = xmm10; // ymm10: shuffle xBxA -> 00BA
const XMMRegister& SHUF_DC00 = xmm12; // ymm12: shuffle xDxC -> DC00
@@ -791,10 +788,14 @@ enum {
// load g - r10 after it is used as scratch
movl(h, Address(CTX, 4*7));
- pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask;
- vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); // [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
- vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); // [_SHUF_00BA wrt rip]
- vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); // [_SHUF_DC00 wrt rip]
+ // the three successive pshuffle_byte_flip_mask stub entries should
+ // be offset by 32 bytes
+ assert(StubRoutines::x86::pshuffle_byte_flip_mask_addr() + 32 == StubRoutines::x86::pshuffle_byte_flip_mask_00ba_addr(), "sanity");
+ assert(StubRoutines::x86::pshuffle_byte_flip_mask_addr() + 64 == StubRoutines::x86::pshuffle_byte_flip_mask_dc00_addr(), "sanity");
+
+ vmovdqu(BYTE_FLIP_MASK, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_addr())); // [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
+ vmovdqu(SHUF_00BA, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_00ba_addr())); // [_SHUF_00BA wrt rip]
+ vmovdqu(SHUF_DC00, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_dc00_addr())); // [_SHUF_DC00 wrt rip]
movl(g, Address(CTX, 4*6));
@@ -953,11 +954,9 @@ bind(only_one_block);
// load g - r10 after use as scratch
movl(h, Address(CTX, 4*7)); // 0x5be0cd19
-
- pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask;
- vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); // [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
- vmovdqu(SHUF_00BA, ExternalAddress(pshuffle_byte_flip_mask_addr + 32)); // [_SHUF_00BA wrt rip]
- vmovdqu(SHUF_DC00, ExternalAddress(pshuffle_byte_flip_mask_addr + 64)); // [_SHUF_DC00 wrt rip]
+ vmovdqu(BYTE_FLIP_MASK, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_addr())); // [PSHUFFLE_BYTE_FLIP_MASK wrt rip]
+ vmovdqu(SHUF_00BA, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_00ba_addr())); // [_SHUF_00BA wrt rip]
+ vmovdqu(SHUF_DC00, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_dc00_addr())); // [_SHUF_DC00 wrt rip]
movl(g, Address(CTX, 4*6)); // 0x1f83d9ab
@@ -1346,9 +1345,12 @@ void MacroAssembler::sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegiste
// load g - r10 after it is used as scratch
movq(h, Address(CTX, 8 * 7));
- pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask_sha512;
- vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); // PSHUFFLE_BYTE_FLIP_MASK wrt rip
- vmovdqu(YMM_MASK_LO, ExternalAddress(pshuffle_byte_flip_mask_addr + 32));
+ // the two successive pshuffle_byte_flip_mask_sha512 stub entries should
+ // be offset by 32 bytes
+ assert(StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512() + 32 == StubRoutines::x86::pshuffle_byte_flip_mask_ymm_lo_addr_sha512(), "sanity");
+
+ vmovdqu(BYTE_FLIP_MASK, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512())); // PSHUFFLE_BYTE_FLIP_MASK wrt rip
+ vmovdqu(YMM_MASK_LO, ExternalAddress(StubRoutines::x86::pshuffle_byte_flip_mask_ymm_lo_addr_sha512())); // MASK_YMM_LO wrt rip
movq(g, Address(CTX, 8 * 6));
diff --git a/src/hotspot/cpu/x86/matcher_x86.hpp b/src/hotspot/cpu/x86/matcher_x86.hpp
index f7973a8564e..62a5d2827bc 100644
--- a/src/hotspot/cpu/x86/matcher_x86.hpp
+++ b/src/hotspot/cpu/x86/matcher_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -75,7 +75,6 @@
}
static bool narrow_klass_use_complex_address() {
- assert(UseCompressedClassPointers, "only for compressed klass code");
return (CompressedKlassPointers::shift() <= 3);
}
diff --git a/src/hotspot/cpu/x86/methodHandles_x86.cpp b/src/hotspot/cpu/x86/methodHandles_x86.cpp
index 54376c6ad9a..5b15444bc32 100644
--- a/src/hotspot/cpu/x86/methodHandles_x86.cpp
+++ b/src/hotspot/cpu/x86/methodHandles_x86.cpp
@@ -110,14 +110,13 @@ void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Registe
__ andl(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
__ cmpl(temp, ref_kind);
__ jcc(Assembler::equal, L);
- { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
- jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
- if (ref_kind == JVM_REF_invokeVirtual ||
- ref_kind == JVM_REF_invokeSpecial)
- // could do this for all ref_kinds, but would explode assembly code size
- trace_method_handle(_masm, buf);
- __ STOP(buf);
+ const char* msg = ref_kind_to_verify_msg(ref_kind);
+ if (ref_kind == JVM_REF_invokeVirtual ||
+ ref_kind == JVM_REF_invokeSpecial) {
+ // could do this for all ref_kinds, but would explode assembly code size
+ trace_method_handle(_masm, msg);
}
+ __ STOP(msg);
BLOCK_COMMENT("} verify_ref_kind");
__ bind(L);
}
diff --git a/src/hotspot/cpu/x86/stubDeclarations_x86.hpp b/src/hotspot/cpu/x86/stubDeclarations_x86.hpp
index 971c8fd3c44..24886deb3c5 100644
--- a/src/hotspot/cpu/x86/stubDeclarations_x86.hpp
+++ b/src/hotspot/cpu/x86/stubDeclarations_x86.hpp
@@ -29,14 +29,16 @@
#define STUBGEN_PREUNIVERSE_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(preuniverse, 500) \
#define STUBGEN_INITIAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(initial, PRODUCT_ONLY(20000) NOT_PRODUCT(21000) WINDOWS_ONLY(+1000)) \
do_stub(initial, verify_mxcsr) \
do_arch_entry(x86, initial, verify_mxcsr, verify_mxcsr_entry, \
@@ -65,14 +67,18 @@
#define STUBGEN_CONTINUATION_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(continuation, 3000) \
+// count needed for declaration of vector_iota_indices stub
+#define VECTOR_IOTA_COUNT 6
#define STUBGEN_COMPILER_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(compiler, 120000 WINDOWS_ONLY(+2000)) \
do_stub(compiler, vector_float_sign_mask) \
do_arch_entry(x86, compiler, vector_float_sign_mask, \
@@ -126,8 +132,9 @@
do_arch_entry(x86, compiler, vector_long_sign_mask, \
vector_long_sign_mask, vector_long_sign_mask) \
do_stub(compiler, vector_iota_indices) \
- do_arch_entry(x86, compiler, vector_iota_indices, \
- vector_iota_indices, vector_iota_indices) \
+ do_arch_entry_array(x86, compiler, vector_iota_indices, \
+ vector_iota_indices, vector_iota_indices, \
+ VECTOR_IOTA_COUNT) \
do_stub(compiler, vector_count_leading_zeros_lut) \
do_arch_entry(x86, compiler, vector_count_leading_zeros_lut, \
vector_count_leading_zeros_lut, \
@@ -161,6 +168,12 @@
do_arch_entry(x86, compiler, pshuffle_byte_flip_mask, \
pshuffle_byte_flip_mask_addr, \
pshuffle_byte_flip_mask_addr) \
+ do_arch_entry(x86, compiler, pshuffle_byte_flip_mask, \
+ pshuffle_byte_flip_mask_00ba_addr, \
+ pshuffle_byte_flip_mask_00ba_addr) \
+ do_arch_entry(x86, compiler, pshuffle_byte_flip_mask, \
+ pshuffle_byte_flip_mask_dc00_addr, \
+ pshuffle_byte_flip_mask_dc00_addr) \
/* x86_64 exposes these 3 stubs via a generic entry array */ \
/* other arches use arch-specific entries */ \
/* this really needs rationalising */ \
@@ -171,6 +184,9 @@
do_arch_entry(x86, compiler, pshuffle_byte_flip_mask_sha512, \
pshuffle_byte_flip_mask_addr_sha512, \
pshuffle_byte_flip_mask_addr_sha512) \
+ do_arch_entry(x86, compiler, pshuffle_byte_flip_mask_sha512, \
+ pshuffle_byte_flip_mask_ymm_lo_addr_sha512, \
+ pshuffle_byte_flip_mask_ymm_lo_addr_sha512) \
do_stub(compiler, compress_perm_table32) \
do_arch_entry(x86, compiler, compress_perm_table32, \
compress_perm_table32, compress_perm_table32) \
@@ -241,7 +257,8 @@
#define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(final, 33000 \
WINDOWS_ONLY(+22000) ZGC_ONLY(+20000)) \
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
index efb0411aa39..993d1964034 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -188,8 +188,18 @@ address StubGenerator::generate_call_stub(address& return_address) {
(int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
"adjust this code");
StubId stub_id = StubId::stubgen_call_stub_id;
+ GrowableArray entries;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 2, "sanity check");
+ address start = load_archive_data(stub_id, &entries);
+ if (start != nullptr) {
+ assert(entries.length() == 1, "expected 1 extra entry");
+ return_address = entries.at(0);
+ return start;
+ }
+
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// same as in generate_catch_exception()!
const Address rsp_after_call(rbp, rsp_after_call_off * wordSize);
@@ -298,6 +308,7 @@ address StubGenerator::generate_call_stub(address& return_address) {
BLOCK_COMMENT("call_stub_return_address:");
return_address = __ pc();
+ entries.append(return_address);
// store result depending on type (everything that is not
// T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
@@ -394,6 +405,9 @@ address StubGenerator::generate_call_stub(address& return_address) {
__ movdbl(Address(c_rarg0, 0), xmm0);
__ jmp(exit);
+ // record the stub entry and end plus the auxiliary entry
+ store_archive_data(stub_id, start, __ pc(), &entries);
+
return start;
}
@@ -411,8 +425,15 @@ address StubGenerator::generate_call_stub(address& return_address) {
address StubGenerator::generate_catch_exception() {
StubId stub_id = StubId::stubgen_catch_exception_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// same as in generate_call_stub():
const Address rsp_after_call(rbp, rsp_after_call_off * wordSize);
@@ -442,7 +463,9 @@ address StubGenerator::generate_catch_exception() {
__ verify_oop(rax);
__ movptr(Address(r15_thread, Thread::pending_exception_offset()), rax);
- __ lea(rscratch1, ExternalAddress((address)__FILE__));
+ // special case -- add file name string to AOT address table
+ address file = (address)AOTCodeCache::add_C_string(__FILE__);
+ __ lea(rscratch1, ExternalAddress(file));
__ movptr(Address(r15_thread, Thread::exception_file_offset()), rscratch1);
__ movl(Address(r15_thread, Thread::exception_line_offset()), (int) __LINE__);
@@ -451,6 +474,9 @@ address StubGenerator::generate_catch_exception() {
"_call_stub_return_address must have been generated before");
__ jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -467,8 +493,14 @@ address StubGenerator::generate_catch_exception() {
address StubGenerator::generate_forward_exception() {
StubId stub_id = StubId::stubgen_forward_exception_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// Upon entry, the sp points to the return address returning into
// Java (interpreted or compiled) code; i.e., the return address
@@ -521,6 +553,9 @@ address StubGenerator::generate_forward_exception() {
__ verify_oop(rax);
__ jmp(rbx);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -531,12 +566,21 @@ address StubGenerator::generate_forward_exception() {
// Result:
address StubGenerator::generate_orderaccess_fence() {
StubId stub_id = StubId::stubgen_fence_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ membar(Assembler::StoreLoad);
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -550,8 +594,14 @@ address StubGenerator::generate_orderaccess_fence() {
address StubGenerator::generate_verify_mxcsr() {
StubId stub_id = StubId::stubgen_verify_mxcsr_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Address mxcsr_save(rsp, 0);
@@ -574,15 +624,24 @@ address StubGenerator::generate_verify_mxcsr() {
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_f2i_fixup() {
StubId stub_id = StubId::stubgen_f2i_fixup_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
Address inout(rsp, 5 * wordSize); // return address + 4 saves
- address start = __ pc();
+ start = __ pc();
Label L;
@@ -613,14 +672,23 @@ address StubGenerator::generate_f2i_fixup() {
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_f2l_fixup() {
StubId stub_id = StubId::stubgen_f2l_fixup_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
Address inout(rsp, 5 * wordSize); // return address + 4 saves
- address start = __ pc();
+ start = __ pc();
Label L;
@@ -651,15 +719,24 @@ address StubGenerator::generate_f2l_fixup() {
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_d2i_fixup() {
StubId stub_id = StubId::stubgen_d2i_fixup_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
Address inout(rsp, 6 * wordSize); // return address + 5 saves
- address start = __ pc();
+ start = __ pc();
Label L;
@@ -699,15 +776,24 @@ address StubGenerator::generate_d2i_fixup() {
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_d2l_fixup() {
StubId stub_id = StubId::stubgen_d2l_fixup_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
Address inout(rsp, 6 * wordSize); // return address + 5 saves
- address start = __ pc();
+ start = __ pc();
Label L;
@@ -747,14 +833,23 @@ address StubGenerator::generate_d2l_fixup() {
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_count_leading_zeros_lut() {
- __ align64();
StubId stub_id = StubId::stubgen_vector_count_leading_zeros_lut_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x0101010102020304, relocInfo::none);
__ emit_data64(0x0000000000000000, relocInfo::none);
@@ -765,14 +860,23 @@ address StubGenerator::generate_count_leading_zeros_lut() {
__ emit_data64(0x0101010102020304, relocInfo::none);
__ emit_data64(0x0000000000000000, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_popcount_avx_lut() {
- __ align64();
StubId stub_id = StubId::stubgen_vector_popcount_lut_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x0302020102010100, relocInfo::none);
__ emit_data64(0x0403030203020201, relocInfo::none);
@@ -783,14 +887,30 @@ address StubGenerator::generate_popcount_avx_lut() {
__ emit_data64(0x0302020102010100, relocInfo::none);
__ emit_data64(0x0403030203020201, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
-address StubGenerator::generate_iota_indices() {
- __ align(CodeEntryAlignment);
+void StubGenerator::generate_iota_indices() {
StubId stub_id = StubId::stubgen_vector_iota_indices_id;
+ GrowableArray entries;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == VECTOR_IOTA_COUNT, "sanity check");
+ address start = load_archive_data(stub_id, &entries);
+ if (start != nullptr) {
+ assert(entries.length() == VECTOR_IOTA_COUNT - 1,
+ "unexpected extra entry count %d", entries.length());
+ StubRoutines::x86::_vector_iota_indices[0] = start;
+ for (int i = 1; i < VECTOR_IOTA_COUNT; i++) {
+ StubRoutines::x86::_vector_iota_indices[i] = entries.at(i - 1);
+ }
+ return;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// B
__ emit_data64(0x0706050403020100, relocInfo::none);
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
@@ -800,6 +920,7 @@ address StubGenerator::generate_iota_indices() {
__ emit_data64(0x2F2E2D2C2B2A2928, relocInfo::none);
__ emit_data64(0x3736353433323130, relocInfo::none);
__ emit_data64(0x3F3E3D3C3B3A3938, relocInfo::none);
+ entries.append(__ pc());
// W
__ emit_data64(0x0003000200010000, relocInfo::none);
__ emit_data64(0x0007000600050004, relocInfo::none);
@@ -809,6 +930,7 @@ address StubGenerator::generate_iota_indices() {
__ emit_data64(0x0017001600150014, relocInfo::none);
__ emit_data64(0x001B001A00190018, relocInfo::none);
__ emit_data64(0x001F001E001D001C, relocInfo::none);
+ entries.append(__ pc());
// D
__ emit_data64(0x0000000100000000, relocInfo::none);
__ emit_data64(0x0000000300000002, relocInfo::none);
@@ -818,6 +940,7 @@ address StubGenerator::generate_iota_indices() {
__ emit_data64(0x0000000B0000000A, relocInfo::none);
__ emit_data64(0x0000000D0000000C, relocInfo::none);
__ emit_data64(0x0000000F0000000E, relocInfo::none);
+ entries.append(__ pc());
// Q
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0x0000000000000001, relocInfo::none);
@@ -827,6 +950,7 @@ address StubGenerator::generate_iota_indices() {
__ emit_data64(0x0000000000000005, relocInfo::none);
__ emit_data64(0x0000000000000006, relocInfo::none);
__ emit_data64(0x0000000000000007, relocInfo::none);
+ entries.append(__ pc());
// D - FP
__ emit_data64(0x3F80000000000000, relocInfo::none); // 0.0f, 1.0f
__ emit_data64(0x4040000040000000, relocInfo::none); // 2.0f, 3.0f
@@ -836,6 +960,7 @@ address StubGenerator::generate_iota_indices() {
__ emit_data64(0x4130000041200000, relocInfo::none); // 10.0f, 11.0f
__ emit_data64(0x4150000041400000, relocInfo::none); // 12.0f, 13.0f
__ emit_data64(0x4170000041600000, relocInfo::none); // 14.0f, 15.0f
+ entries.append(__ pc());
// Q - FP
__ emit_data64(0x0000000000000000, relocInfo::none); // 0.0d
__ emit_data64(0x3FF0000000000000, relocInfo::none); // 1.0d
@@ -845,14 +970,30 @@ address StubGenerator::generate_iota_indices() {
__ emit_data64(0x4014000000000000, relocInfo::none); // 5.0d
__ emit_data64(0x4018000000000000, relocInfo::none); // 6.0d
__ emit_data64(0x401c000000000000, relocInfo::none); // 7.0d
- return start;
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc(), &entries);
+
+ // install the entry addresses in the entry array
+ assert(entries.length() == entry_count - 1,
+ "unexpected entries count %d", entries.length());
+ StubRoutines::x86::_vector_iota_indices[0] = start;
+ for (int i = 1; i < VECTOR_IOTA_COUNT; i++) {
+ StubRoutines::x86::_vector_iota_indices[i] = entries.at(i - 1);
+ }
}
address StubGenerator::generate_vector_reverse_bit_lut() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_vector_reverse_bit_lut_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x0E060A020C040800, relocInfo::none);
__ emit_data64(0x0F070B030D050901, relocInfo::none);
@@ -863,14 +1004,23 @@ address StubGenerator::generate_vector_reverse_bit_lut() {
__ emit_data64(0x0E060A020C040800, relocInfo::none);
__ emit_data64(0x0F070B030D050901, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_vector_reverse_byte_perm_mask_long() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_vector_reverse_byte_perm_mask_long_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x0001020304050607, relocInfo::none);
__ emit_data64(0x08090A0B0C0D0E0F, relocInfo::none);
@@ -881,14 +1031,23 @@ address StubGenerator::generate_vector_reverse_byte_perm_mask_long() {
__ emit_data64(0x0001020304050607, relocInfo::none);
__ emit_data64(0x08090A0B0C0D0E0F, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_vector_reverse_byte_perm_mask_int() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_vector_reverse_byte_perm_mask_int_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x0405060700010203, relocInfo::none);
__ emit_data64(0x0C0D0E0F08090A0B, relocInfo::none);
@@ -899,14 +1058,23 @@ address StubGenerator::generate_vector_reverse_byte_perm_mask_int() {
__ emit_data64(0x0405060700010203, relocInfo::none);
__ emit_data64(0x0C0D0E0F08090A0B, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_vector_reverse_byte_perm_mask_short() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_vector_reverse_byte_perm_mask_short_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x0607040502030001, relocInfo::none);
__ emit_data64(0x0E0F0C0D0A0B0809, relocInfo::none);
@@ -917,31 +1085,52 @@ address StubGenerator::generate_vector_reverse_byte_perm_mask_short() {
__ emit_data64(0x0607040502030001, relocInfo::none);
__ emit_data64(0x0E0F0C0D0A0B0809, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_vector_byte_shuffle_mask() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_vector_byte_shuffle_mask_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x7070707070707070, relocInfo::none);
__ emit_data64(0x7070707070707070, relocInfo::none);
__ emit_data64(0xF0F0F0F0F0F0F0F0, relocInfo::none);
__ emit_data64(0xF0F0F0F0F0F0F0F0, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_fp_mask(StubId stub_id, int64_t mask) {
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64( mask, relocInfo::none );
__ emit_data64( mask, relocInfo::none );
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -957,9 +1146,15 @@ address StubGenerator::generate_compress_perm_table(StubId stub_id) {
default:
ShouldNotReachHere();
}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
if (esize == 32) {
// Loop to generate 256 x 8 int compression permute index table. A row is
// accessed using 8 bit index computed using vector mask. An entry in
@@ -997,6 +1192,9 @@ address StubGenerator::generate_compress_perm_table(StubId stub_id) {
}
}
}
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1012,9 +1210,15 @@ address StubGenerator::generate_expand_perm_table(StubId stub_id) {
default:
ShouldNotReachHere();
}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
if (esize == 32) {
// Loop to generate 256 x 8 int expand permute index table. A row is accessed
// using 8 bit index computed using vector mask. An entry in a row holds either
@@ -1050,13 +1254,22 @@ address StubGenerator::generate_expand_perm_table(StubId stub_id) {
}
}
}
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_vector_mask(StubId stub_id, int64_t mask) {
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
@@ -1067,14 +1280,23 @@ address StubGenerator::generate_vector_mask(StubId stub_id, int64_t mask) {
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_vector_byte_perm_mask() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_vector_byte_perm_mask_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x0000000000000001, relocInfo::none);
__ emit_data64(0x0000000000000003, relocInfo::none);
@@ -1085,13 +1307,22 @@ address StubGenerator::generate_vector_byte_perm_mask() {
__ emit_data64(0x0000000000000004, relocInfo::none);
__ emit_data64(0x0000000000000006, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_vector_fp_mask(StubId stub_id, int64_t mask) {
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
@@ -1102,6 +1333,9 @@ address StubGenerator::generate_vector_fp_mask(StubId stub_id, int64_t mask) {
__ emit_data64(mask, relocInfo::none);
__ emit_data64(mask, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1110,9 +1344,15 @@ address StubGenerator::generate_vector_custom_i32(StubId stub_id, Assembler::Avx
int32_t val4, int32_t val5, int32_t val6, int32_t val7,
int32_t val8, int32_t val9, int32_t val10, int32_t val11,
int32_t val12, int32_t val13, int32_t val14, int32_t val15) {
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(len != Assembler::AVX_NoVec, "vector len must be specified");
__ emit_data(val0, relocInfo::none, 0);
@@ -1135,6 +1375,9 @@ address StubGenerator::generate_vector_custom_i32(StubId stub_id, Assembler::Avx
__ emit_data(val15, relocInfo::none, 0);
}
}
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1156,8 +1399,14 @@ address StubGenerator::generate_vector_custom_i32(StubId stub_id, Assembler::Avx
// * = popped on exit
address StubGenerator::generate_verify_oop() {
StubId stub_id = StubId::stubgen_verify_oop_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label exit, error;
@@ -1235,6 +1484,9 @@ address StubGenerator::generate_verify_oop() {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
__ hlt();
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1350,35 +1602,46 @@ void StubGenerator::restore_argument_regs(BasicType type) {
address StubGenerator::generate_data_cache_writeback() {
const Register src = c_rarg0; // source address
-
- __ align(CodeEntryAlignment);
-
StubId stub_id = StubId::stubgen_data_cache_writeback_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
__ cache_wb(Address(src, 0));
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_data_cache_writeback_sync() {
const Register is_pre = c_rarg0; // pre or post sync
-
- __ align(CodeEntryAlignment);
-
StubId stub_id = StubId::stubgen_data_cache_writeback_sync_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
// pre wbsync is a no-op
// post wbsync translates to an sfence
Label skip;
- address start = __ pc();
+ start = __ pc();
__ enter();
__ cmpl(is_pre, 0);
@@ -1388,6 +1651,9 @@ address StubGenerator::generate_data_cache_writeback_sync() {
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1405,9 +1671,15 @@ address StubGenerator::generate_md5_implCompress(StubId stub_id) {
default:
ShouldNotReachHere();
}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register buf_param = r15;
const Address state_param(rsp, 0 * wordSize);
@@ -1437,30 +1709,51 @@ address StubGenerator::generate_md5_implCompress(StubId stub_id) {
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_upper_word_mask() {
- __ align64();
StubId stub_id = StubId::stubgen_upper_word_mask_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0xFFFFFFFF00000000, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_shuffle_byte_flip_mask() {
- __ align64();
StubId stub_id = StubId::stubgen_shuffle_byte_flip_mask_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
__ emit_data64(0x0001020304050607, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1478,9 +1771,15 @@ address StubGenerator::generate_sha1_implCompress(StubId stub_id) {
default:
ShouldNotReachHere();
}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
@@ -1509,15 +1808,32 @@ address StubGenerator::generate_sha1_implCompress(StubId stub_id) {
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
-address StubGenerator::generate_pshuffle_byte_flip_mask() {
- __ align64();
+address StubGenerator::generate_pshuffle_byte_flip_mask(address& entry_00ba, address& entry_dc00) {
StubId stub_id = StubId::stubgen_pshuffle_byte_flip_mask_id;
+ GrowableArray entries;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 3, "sanity check");
+ address start = load_archive_data(stub_id, &entries);
+ if (start != nullptr) {
+ assert(entries.length() == entry_count - 1,
+ "unexpected extra entry count %d", entries.length());
+ entry_00ba = entries.at(0);
+ entry_dc00 = entries.at(1);
+ assert(VM_Version::supports_avx2() == (entry_00ba != nullptr && entry_dc00 != nullptr),
+ "entries cannot be null when avx2 is enabled");
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
-
+ start = __ pc();
+ address entry2 = nullptr;
+ address entry3 = nullptr;
__ emit_data64(0x0405060700010203, relocInfo::none);
__ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
@@ -1525,37 +1841,66 @@ address StubGenerator::generate_pshuffle_byte_flip_mask() {
__ emit_data64(0x0405060700010203, relocInfo::none); // second copy
__ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
// _SHUF_00BA
+ entry2 = __ pc();
__ emit_data64(0x0b0a090803020100, relocInfo::none);
__ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
__ emit_data64(0x0b0a090803020100, relocInfo::none);
__ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
// _SHUF_DC00
+ entry3 = __ pc();
__ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
__ emit_data64(0x0b0a090803020100, relocInfo::none);
__ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
__ emit_data64(0x0b0a090803020100, relocInfo::none);
}
+ // have to track the 2nd and 3rd entries even if they are null
+ entry_00ba = entry2;
+ entries.push(entry_00ba);
+ entry_dc00 = entry3;
+ entries.push(entry_dc00);
+
+ // record the stub entry and end plus all the auxiliary entries
+ store_archive_data(stub_id, start, __ pc(), &entries);
return start;
}
//Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
-address StubGenerator::generate_pshuffle_byte_flip_mask_sha512() {
- __ align32();
+address StubGenerator::generate_pshuffle_byte_flip_mask_sha512(address& entry_ymm_lo) {
StubId stub_id = StubId::stubgen_pshuffle_byte_flip_mask_sha512_id;
+ GrowableArray entries;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 2, "sanity check");
+ address start = load_archive_data(stub_id, &entries);
+ if (start != nullptr) {
+ assert(entries.length() == entry_count - 1,
+ "unexpected extra entry count %d", entries.length());
+ entry_ymm_lo = entries.at(0);
+ assert(VM_Version::supports_avx2() == (entry_ymm_lo != nullptr),
+ "entry cannot be null when avx2 is enabled");
+ return start;
+ }
+ __ align32();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
-
+ start = __ pc();
+ address entry2 = nullptr;
if (VM_Version::supports_avx2()) {
__ emit_data64(0x0001020304050607, relocInfo::none); // PSHUFFLE_BYTE_FLIP_MASK
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
__ emit_data64(0x1011121314151617, relocInfo::none);
__ emit_data64(0x18191a1b1c1d1e1f, relocInfo::none);
+ // capture 2nd entry
+ entry2 = __ pc();
__ emit_data64(0x0000000000000000, relocInfo::none); //MASK_YMM_LO
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
__ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
}
+ // have to track the 2nd entry even if it is null
+ entry_ymm_lo = entry2;
+ entries.push(entry2);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc(), &entries);
return start;
}
@@ -1575,9 +1920,15 @@ address StubGenerator::generate_sha256_implCompress(StubId stub_id) {
ShouldNotReachHere();
}
assert(VM_Version::supports_sha() || VM_Version::supports_avx2(), "");
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
@@ -1612,6 +1963,9 @@ address StubGenerator::generate_sha256_implCompress(StubId stub_id) {
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1629,9 +1983,15 @@ address StubGenerator::generate_sha512_implCompress(StubId stub_id) {
}
assert(VM_Version::supports_avx2(), "");
assert(VM_Version::supports_bmi2() || VM_Version::supports_sha512(), "");
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Register buf = c_rarg0;
Register state = c_rarg1;
@@ -1660,14 +2020,23 @@ address StubGenerator::generate_sha512_implCompress(StubId stub_id) {
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_shuffle_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_shuffle_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -1680,42 +2049,69 @@ address StubGenerator::base64_shuffle_addr() {
__ emit_data64(0x2829272825262425, relocInfo::none);
__ emit_data64(0x2e2f2d2e2b2c2a2b, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_avx2_shuffle_addr() {
- __ align32();
StubId stub_id = StubId::stubgen_avx2_shuffle_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align32();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x0809070805060405, relocInfo::none);
__ emit_data64(0x0e0f0d0e0b0c0a0b, relocInfo::none);
__ emit_data64(0x0405030401020001, relocInfo::none);
__ emit_data64(0x0a0b090a07080607, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_avx2_input_mask_addr() {
- __ align32();
StubId stub_id = StubId::stubgen_avx2_input_mask_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align32();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0x8000000000000000, relocInfo::none);
__ emit_data64(0x8000000080000000, relocInfo::none);
__ emit_data64(0x8000000080000000, relocInfo::none);
__ emit_data64(0x8000000080000000, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_avx2_lut_addr() {
- __ align32();
StubId stub_id = StubId::stubgen_avx2_lut_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align32();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none);
__ emit_data64(0x0000f0edfcfcfcfc, relocInfo::none);
@@ -1728,14 +2124,23 @@ address StubGenerator::base64_avx2_lut_addr() {
__ emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none);
__ emit_data64(0x000020effcfcfcfc, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_encoding_table_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_encoding_table_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0, "Alignment problem (0x%08llx)", (unsigned long long)start);
__ emit_data64(0x4847464544434241, relocInfo::none);
@@ -1757,6 +2162,9 @@ address StubGenerator::base64_encoding_table_addr() {
__ emit_data64(0x333231307a797877, relocInfo::none);
__ emit_data64(0x5f2d393837363534, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1766,10 +2174,16 @@ address StubGenerator::base64_encoding_table_addr() {
// boolean isURL) {
address StubGenerator::generate_base64_encodeBlock()
{
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_base64_encodeBlock_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
@@ -2144,15 +2558,24 @@ address StubGenerator::generate_base64_encodeBlock()
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
// base64 AVX512vbmi tables
address StubGenerator::base64_vbmi_lookup_lo_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_lookup_lo_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -2165,14 +2588,23 @@ address StubGenerator::base64_vbmi_lookup_lo_addr() {
__ emit_data64(0x3b3a393837363534, relocInfo::none);
__ emit_data64(0x8080808080803d3c, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_vbmi_lookup_hi_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_lookup_hi_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -2185,13 +2617,22 @@ address StubGenerator::base64_vbmi_lookup_hi_addr() {
__ emit_data64(0x302f2e2d2c2b2a29, relocInfo::none);
__ emit_data64(0x8080808080333231, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_vbmi_lookup_lo_url_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_lookup_lo_base64url_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -2204,14 +2645,23 @@ address StubGenerator::base64_vbmi_lookup_lo_url_addr() {
__ emit_data64(0x3b3a393837363534, relocInfo::none);
__ emit_data64(0x8080808080803d3c, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_vbmi_lookup_hi_url_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_lookup_hi_base64url_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -2224,14 +2674,23 @@ address StubGenerator::base64_vbmi_lookup_hi_url_addr() {
__ emit_data64(0x302f2e2d2c2b2a29, relocInfo::none);
__ emit_data64(0x8080808080333231, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_vbmi_pack_vec_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_pack_vec_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -2244,14 +2703,23 @@ address StubGenerator::base64_vbmi_pack_vec_addr() {
__ emit_data64(0x0000000000000000, relocInfo::none);
__ emit_data64(0x0000000000000000, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_vbmi_join_0_1_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_join_0_1_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -2264,14 +2732,23 @@ address StubGenerator::base64_vbmi_join_0_1_addr() {
__ emit_data64(0x494a444546404142, relocInfo::none);
__ emit_data64(0x565051524c4d4e48, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_vbmi_join_1_2_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_join_1_2_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -2284,14 +2761,23 @@ address StubGenerator::base64_vbmi_join_1_2_addr() {
__ emit_data64(0x5c5d5e58595a5455, relocInfo::none);
__ emit_data64(0x696a646566606162, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_vbmi_join_2_3_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_join_2_3_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -2304,14 +2790,23 @@ address StubGenerator::base64_vbmi_join_2_3_addr() {
__ emit_data64(0x767071726c6d6e68, relocInfo::none);
__ emit_data64(0x7c7d7e78797a7475, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_AVX2_decode_tables_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_avx2_decode_tables_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -2339,14 +2834,23 @@ address StubGenerator::base64_AVX2_decode_tables_addr() {
// merge multiplier
__ emit_data(0x00011000, relocInfo::none, 0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_AVX2_decode_LUT_tables_addr() {
- __ align64();
StubId stub_id = StubId::stubgen_avx2_decode_lut_tables_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align64();
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
assert(((unsigned long long)start & 0x3f) == 0,
"Alignment problem (0x%08llx)", (unsigned long long)start);
@@ -2380,13 +2884,22 @@ address StubGenerator::base64_AVX2_decode_LUT_tables_addr() {
__ emit_data64(0x0804080402011010, relocInfo::none);
__ emit_data64(0x1010101010101010, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::base64_decoding_table_addr() {
StubId stub_id = StubId::stubgen_decoding_table_base64_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ emit_data64(0xffffffffffffffff, relocInfo::none);
__ emit_data64(0xffffffffffffffff, relocInfo::none);
@@ -2455,6 +2968,9 @@ address StubGenerator::base64_decoding_table_addr() {
__ emit_data64(0xffffffffffffffff, relocInfo::none);
__ emit_data64(0xffffffffffffffff, relocInfo::none);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -2466,10 +2982,16 @@ address StubGenerator::base64_decoding_table_addr() {
// Intrinsic function prototype in Base64.java:
// private void decodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL, isMIME) {
address StubGenerator::generate_base64_decodeBlock() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_base64_decodeBlock_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
@@ -2982,6 +3504,9 @@ address StubGenerator::generate_base64_decodeBlock() {
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3000,11 +3525,17 @@ address StubGenerator::generate_base64_decodeBlock() {
address StubGenerator::generate_updateBytesCRC32() {
assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_updateBytesCRC32_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
// Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
@@ -3039,6 +3570,9 @@ address StubGenerator::generate_updateBytesCRC32() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3057,10 +3591,16 @@ address StubGenerator::generate_updateBytesCRC32() {
*/
address StubGenerator::generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
assert(UseCRC32CIntrinsics, "need SSE4_2");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_updateBytesCRC32C_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
//reg.arg int#0 int#1 int#2 int#3 int#4 int#5 float regs
//Windows RCX RDX R8 R9 none none XMM0..XMM3
@@ -3120,6 +3660,9 @@ address StubGenerator::generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3138,10 +3681,16 @@ address StubGenerator::generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
* rsp+40 - z address
*/
address StubGenerator::generate_multiplyToLen() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_multiplyToLen_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
// Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
@@ -3179,6 +3728,9 @@ address StubGenerator::generate_multiplyToLen() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3195,10 +3747,16 @@ address StubGenerator::generate_multiplyToLen() {
* rax - int >= mismatched index, < 0 bitwise complement of tail
*/
address StubGenerator::generate_vectorizedMismatch() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_vectorizedMismatch_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
BLOCK_COMMENT("Entry:");
__ enter();
@@ -3232,6 +3790,9 @@ address StubGenerator::generate_vectorizedMismatch() {
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3247,10 +3808,16 @@ address StubGenerator::generate_vectorizedMismatch() {
*/
address StubGenerator::generate_squareToLen() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_squareToLen_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
// Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
@@ -3279,14 +3846,23 @@ address StubGenerator::generate_squareToLen() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_method_entry_barrier() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_method_entry_barrier_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label deoptimize_label;
@@ -3356,6 +3932,9 @@ address StubGenerator::generate_method_entry_barrier() {
__ movptr(rsp, Address(rsp, 0)); // new rsp was written in the barrier
__ jmp(Address(rsp, -1 * wordSize)); // jmp target should be callers verified_entry_point
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3373,10 +3952,16 @@ address StubGenerator::generate_method_entry_barrier() {
* rsp+40 - k
*/
address StubGenerator::generate_mulAdd() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_mulAdd_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
// Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
@@ -3411,14 +3996,23 @@ address StubGenerator::generate_mulAdd() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_bigIntegerRightShift() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_bigIntegerRightShiftWorker_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
// For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
@@ -3534,6 +4128,9 @@ address StubGenerator::generate_bigIntegerRightShift() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3551,10 +4148,16 @@ address StubGenerator::generate_bigIntegerRightShift() {
* rsp40 - numIter
*/
address StubGenerator::generate_bigIntegerLeftShift() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_bigIntegerLeftShiftWorker_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
// For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
@@ -3659,6 +4262,9 @@ address StubGenerator::generate_bigIntegerLeftShift() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3708,9 +4314,15 @@ void StubGenerator::generate_libm_stubs() {
*/
address StubGenerator::generate_float16ToFloat() {
StubId stub_id = StubId::stubgen_hf2f_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
BLOCK_COMMENT("Entry:");
// No need for RuntimeStub frame since it is called only during JIT compilation
@@ -3720,6 +4332,9 @@ address StubGenerator::generate_float16ToFloat() {
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3734,9 +4349,15 @@ address StubGenerator::generate_float16ToFloat() {
*/
address StubGenerator::generate_floatToFloat16() {
StubId stub_id = StubId::stubgen_f2hf_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
BLOCK_COMMENT("Entry:");
// No need for RuntimeStub frame since it is called only during JIT compilation
@@ -3746,6 +4367,9 @@ address StubGenerator::generate_floatToFloat16() {
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3775,8 +4399,14 @@ address StubGenerator::generate_cont_thaw(StubId stub_id) {
default:
ShouldNotReachHere();
}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// TODO: Handle Valhalla return types. May require generating different return barriers.
@@ -3889,6 +4519,9 @@ address StubGenerator::generate_cont_thaw(StubId stub_id) {
__ ret(0);
}
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3909,8 +4542,14 @@ address StubGenerator::generate_cont_returnBarrier_exception() {
address StubGenerator::generate_cont_preempt_stub() {
if (!Continuations::enabled()) return nullptr;
StubId stub_id = StubId::stubgen_cont_preempt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ reset_last_Java_frame(true);
@@ -3934,14 +4573,23 @@ address StubGenerator::generate_cont_preempt_stub() {
__ movptr(rscratch1, ExternalAddress(ContinuationEntry::thaw_call_pc_address()));
__ jmp(rscratch1);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
// exception handler for upcall stubs
address StubGenerator::generate_upcall_stub_exception_handler() {
StubId stub_id = StubId::stubgen_upcall_stub_exception_handler_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// native caller has no idea how to handle exceptions
// we just crash here. Up to callee to catch exceptions.
@@ -3953,6 +4601,9 @@ address StubGenerator::generate_upcall_stub_exception_handler() {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, UpcallLinker::handle_uncaught_exception)));
__ should_not_reach_here();
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -3961,8 +4612,14 @@ address StubGenerator::generate_upcall_stub_exception_handler() {
// rbx = result
address StubGenerator::generate_upcall_stub_load_target() {
StubId stub_id = StubId::stubgen_upcall_stub_load_target_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ resolve_global_jobject(j_rarg0, rscratch1);
// Load target method from receiver
@@ -3976,11 +4633,27 @@ address StubGenerator::generate_upcall_stub_load_target() {
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
void StubGenerator::generate_lookup_secondary_supers_table_stub() {
StubId stub_id = StubId::stubgen_lookup_secondary_supers_table_id;
+ GrowableArray entries;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == Klass::SECONDARY_SUPERS_TABLE_SIZE, "sanity check");
+ address start = load_archive_data(stub_id, &entries);
+ if (start != nullptr) {
+ assert(entries.length() == Klass::SECONDARY_SUPERS_TABLE_SIZE - 1,
+ "unexpected extra entry count %d", entries.length());
+ StubRoutines::_lookup_secondary_supers_table_stubs[0] = start;
+ for (int slot = 1; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) {
+ StubRoutines::_lookup_secondary_supers_table_stubs[slot] = entries.at(slot - 1);
+ }
+ return;
+ }
StubCodeMark mark(this, stub_id);
const Register
@@ -3989,21 +4662,35 @@ void StubGenerator::generate_lookup_secondary_supers_table_stub() {
result = rdi;
for (int slot = 0; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) {
- StubRoutines::_lookup_secondary_supers_table_stubs[slot] = __ pc();
+ address next_entry = __ pc();
+ if (slot == 0) {
+ start = next_entry;
+ } else {
+ entries.append(next_entry);
+ }
+ StubRoutines::_lookup_secondary_supers_table_stubs[slot] = next_entry;
__ lookup_secondary_supers_table_const(r_sub_klass, r_super_klass,
rdx, rcx, rbx, r11, // temps
result,
slot);
__ ret(0);
}
+
+ // record the stub entry and end plus all the auxiliary entries
+ store_archive_data(stub_id, start, __ pc(), &entries);
}
// Slow path implementation for UseSecondarySupersTable.
address StubGenerator::generate_lookup_secondary_supers_table_slow_path_stub() {
StubId stub_id = StubId::stubgen_lookup_secondary_supers_table_slow_path_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
-
- address start = __ pc();
+ start = __ pc();
const Register
r_super_klass = rax,
@@ -4025,6 +4712,9 @@ address StubGenerator::generate_lookup_secondary_supers_table_slow_path_stub() {
__ movl(result, 0);
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -4165,7 +4855,7 @@ void StubGenerator::generate_compiler_stubs() {
StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask(StubId::stubgen_vector_short_shuffle_mask_id, 0x0100010001000100);
StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask(StubId::stubgen_vector_long_shuffle_mask_id, 0x0000000100000000);
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask(StubId::stubgen_vector_long_sign_mask_id, 0x8000000000000000);
- StubRoutines::x86::_vector_iota_indices = generate_iota_indices();
+ generate_iota_indices();
StubRoutines::x86::_vector_count_leading_zeros_lut = generate_count_leading_zeros_lut();
StubRoutines::x86::_vector_reverse_bit_lut = generate_vector_reverse_bit_lut();
StubRoutines::x86::_vector_reverse_byte_perm_mask_long = generate_vector_reverse_byte_perm_mask_long();
@@ -4232,6 +4922,8 @@ void StubGenerator::generate_compiler_stubs() {
}
if (UseSHA256Intrinsics) {
+ address entry2 = nullptr;
+ address entry3 = nullptr;
StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
char* dst = (char*)StubRoutines::x86::_k256_W;
char* src = (char*)StubRoutines::x86::_k256;
@@ -4240,14 +4932,18 @@ void StubGenerator::generate_compiler_stubs() {
memcpy(dst + 32 * ii + 16, src + 16 * ii, 16);
}
StubRoutines::x86::_k256_W_adr = (address)StubRoutines::x86::_k256_W;
- StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
+ StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask(entry2, entry3);
+ StubRoutines::x86::_pshuffle_byte_flip_mask_00ba_addr = entry2;
+ StubRoutines::x86::_pshuffle_byte_flip_mask_dc00_addr = entry3;
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(StubId::stubgen_sha256_implCompress_id);
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(StubId::stubgen_sha256_implCompressMB_id);
}
if (UseSHA512Intrinsics) {
+ address entry2 = nullptr;
StubRoutines::x86::_k512_W_addr = (address)StubRoutines::x86::_k512_W;
- StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512();
+ StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512(entry2);
+ StubRoutines::x86::_pshuffle_byte_flip_mask_ymm_lo_addr_sha512 = entry2;
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(StubId::stubgen_sha512_implCompress_id);
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(StubId::stubgen_sha512_implCompressMB_id);
}
@@ -4325,7 +5021,7 @@ void StubGenerator::generate_compiler_stubs() {
#endif // COMPILER2_OR_JVMCI
}
-StubGenerator::StubGenerator(CodeBuffer* code, BlobId blob_id) : StubCodeGenerator(code, blob_id) {
+StubGenerator::StubGenerator(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) : StubCodeGenerator(code, blob_id, stub_data) {
switch(blob_id) {
case BlobId::stubgen_preuniverse_id:
generate_preuniverse_stubs();
@@ -4348,8 +5044,35 @@ StubGenerator::StubGenerator(CodeBuffer* code, BlobId blob_id) : StubCodeGenerat
};
}
-void StubGenerator_generate(CodeBuffer* code, BlobId blob_id) {
- StubGenerator g(code, blob_id);
+#if INCLUDE_CDS
+// publish addresses of static data defined in this file and in other
+// stubgen stub generator files
+void StubGenerator::init_AOTAddressTable(GrowableArray& external_addresses) {
+ init_AOTAddressTable_adler(external_addresses);
+ init_AOTAddressTable_aes(external_addresses);
+ init_AOTAddressTable_cbrt(external_addresses);
+ init_AOTAddressTable_chacha(external_addresses);
+ // constants publishes for all of address use by cos and almost all of sin
+ init_AOTAddressTable_constants(external_addresses);
+ init_AOTAddressTable_dilithium(external_addresses);
+ init_AOTAddressTable_exp(external_addresses);
+ init_AOTAddressTable_fmod(external_addresses);
+ init_AOTAddressTable_ghash(external_addresses);
+ init_AOTAddressTable_kyber(external_addresses);
+ init_AOTAddressTable_log(external_addresses);
+ init_AOTAddressTable_poly1305(external_addresses);
+ init_AOTAddressTable_poly_mont(external_addresses);
+ init_AOTAddressTable_pow(external_addresses);
+ init_AOTAddressTable_sha3(external_addresses);
+ init_AOTAddressTable_sin(external_addresses);
+ init_AOTAddressTable_sinh(external_addresses);
+ init_AOTAddressTable_tan(external_addresses);
+ init_AOTAddressTable_tanh(external_addresses);
+}
+#endif // INCLUDE_CDS
+
+void StubGenerator_generate(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data) {
+ StubGenerator g(code, blob_id, stub_data);
}
#undef __
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
index 36315535d16..d3823cb559f 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -84,7 +84,7 @@ class StubGenerator: public StubCodeGenerator {
address generate_count_leading_zeros_lut();
address generate_popcount_avx_lut();
- address generate_iota_indices();
+ void generate_iota_indices();
address generate_vector_reverse_bit_lut();
address generate_vector_reverse_byte_perm_mask_long();
@@ -166,12 +166,12 @@ class StubGenerator: public StubCodeGenerator {
// - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs)
// for both special cases (various small block sizes) and aligned copy loop. This is the
// default configuration.
- // - If copy length is above AVX3Threshold, then implementation use 64 byte vectors (ZMMs)
+ // - If copy length is above CopyAVX3Threshold, then implementation use 64 byte vectors (ZMMs)
// for main copy loop (and subsequent tail) since bulk of the cycles will be consumed in it.
// - If user forces MaxVectorSize=32 then above 4096 bytes its seen that REP MOVs shows a
// better performance for disjoint copies. For conjoint/backward copy vector based
// copy performs better.
- // - If user sets AVX3Threshold=0, then special cases for small blocks sizes operate over
+ // - If user sets CopyAVX3Threshold=0, then special cases for small blocks sizes operate over
// 64 byte vector registers (ZMMs).
address generate_disjoint_copy_avx3_masked(StubId stub_id, address* entry);
@@ -303,11 +303,11 @@ class StubGenerator: public StubCodeGenerator {
address generate_sha512_implCompress(StubId stub_id);
// Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
- address generate_pshuffle_byte_flip_mask_sha512();
+ address generate_pshuffle_byte_flip_mask_sha512(address& entry_ymm_lo);
address generate_upper_word_mask();
address generate_shuffle_byte_flip_mask();
- address generate_pshuffle_byte_flip_mask();
+ address generate_pshuffle_byte_flip_mask(address& entry_00ba, address& entry_dc0);
// AES intrinsic stubs
@@ -330,6 +330,19 @@ class StubGenerator: public StubCodeGenerator {
void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
+ // Shared implementation for ECB/AES Encrypt and Decrypt, which does 4 blocks
+ // in a loop at a time to hide instruction latency. Set is_encrypt=true for
+ // encryption, false for decryption.
+ address generate_electronicCodeBook_AESCrypt_Parallel(bool is_encrypt);
+
+ // A version of ECB/AES Encrypt which does 4 blocks in a loop at a time
+ // to hide instruction latency
+ address generate_electronicCodeBook_encryptAESCrypt_Parallel();
+
+ // A version of ECB/AES Decrypt which does 4 blocks in a loop at a time
+ // to hide instruction latency
+ address generate_electronicCodeBook_decryptAESCrypt_Parallel();
+
// Vector AES Galois Counter Mode implementation
address generate_galoisCounterMode_AESCrypt();
void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
@@ -637,8 +650,33 @@ class StubGenerator: public StubCodeGenerator {
void generate_compiler_stubs();
void generate_final_stubs();
+#if INCLUDE_CDS
+ static void init_AOTAddressTable_adler(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_aes(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_cbrt(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_chacha(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_constants(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_dilithium(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_exp(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_fmod(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_ghash(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_kyber(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_log(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_poly1305(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_poly_mont(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_pow(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_sha3(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_sin(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_sinh(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_tan(GrowableArray& external_addresses);
+ static void init_AOTAddressTable_tanh(GrowableArray& external_addresses);
+#endif // INCLUDE_CDS
+
public:
- StubGenerator(CodeBuffer* code, BlobId blob_id);
+ StubGenerator(CodeBuffer* code, BlobId blob_id, AOTStubData* stub_data);
+#if INCLUDE_CDS
+ static void init_AOTAddressTable(GrowableArray& external_addresses);
+#endif // INCLUDE_CDS
};
#endif // CPU_X86_STUBGENERATOR_X86_64_HPP
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_adler.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_adler.cpp
index 2799997a761..a9424978e0e 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_adler.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_adler.cpp
@@ -67,8 +67,14 @@ address StubGenerator::generate_updateBytesAdler32() {
__ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_updateBytesAdler32_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// Choose an appropriate LIMIT for inner loop based on the granularity
// of intermediate results. For int, LIMIT of 5552 will ensure intermediate
@@ -144,7 +150,7 @@ address StubGenerator::generate_updateBytesAdler32() {
__ align32();
if (VM_Version::supports_avx512vl()) {
// AVX2 performs better for smaller inputs because of leaner post loop reduction sequence..
- __ cmpl(s, MAX2(128, VM_Version::avx3_threshold()));
+ __ cmpl(s, MAX2(128, CopyAVX3Threshold));
__ jcc(Assembler::belowEqual, SLOOP1A_AVX2);
__ lea(end, Address(s, data, Address::times_1, - (2*CHUNKSIZE -1)));
@@ -334,7 +340,19 @@ address StubGenerator::generate_updateBytesAdler32() {
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
return start;
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_adler(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr))
+ ADD(ADLER32_ASCALE_TABLE);
+ ADD(ADLER32_SHUF0_TABLE);
+ ADD(ADLER32_SHUF1_TABLE);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
index 24de32a6fe7..b95aa5f8818 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2019, 2025, Intel Corporation. All rights reserved.
+* Copyright (c) 2019, 2026, Intel Corporation. All rights reserved.
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -218,7 +218,9 @@ void StubGenerator::generate_aes_stubs() {
StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
} else {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
- if (VM_Version::supports_avx2()) {
+ StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt_Parallel();
+ StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt_Parallel();
+ if (VM_Version::supports_avx2() && VM_Version::supports_clmul()) {
StubRoutines::_galoisCounterMode_AESCrypt = generate_avx2_galoisCounterMode_AESCrypt();
}
}
@@ -248,10 +250,16 @@ void StubGenerator::generate_aes_stubs() {
// Output:
// rax - number of processed bytes
address StubGenerator::generate_galoisCounterMode_AESCrypt() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_galoisCounterMode_AESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register in = c_rarg0;
const Register len = c_rarg1;
@@ -317,6 +325,9 @@ address StubGenerator::generate_galoisCounterMode_AESCrypt() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -335,10 +346,16 @@ address StubGenerator::generate_galoisCounterMode_AESCrypt() {
// Output:
// rax - number of processed bytes
address StubGenerator::generate_avx2_galoisCounterMode_AESCrypt() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_galoisCounterMode_AESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register in = c_rarg0;
const Register len = c_rarg1;
@@ -402,15 +419,24 @@ address StubGenerator::generate_avx2_galoisCounterMode_AESCrypt() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
// Vector AES Counter implementation
address StubGenerator::generate_counterMode_VectorAESCrypt() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_counterMode_AESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
@@ -469,6 +495,9 @@ address StubGenerator::generate_counterMode_VectorAESCrypt() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -496,10 +525,16 @@ address StubGenerator::generate_counterMode_VectorAESCrypt() {
//
address StubGenerator::generate_counterMode_AESCrypt_Parallel() {
assert(UseAES, "need AES instructions and misaligned SSE support");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_counterMode_AESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
@@ -779,15 +814,24 @@ address StubGenerator::generate_counterMode_AESCrypt_Parallel() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_cipherBlockChaining_decryptVectorAESCrypt() {
assert(VM_Version::supports_avx512_vaes(), "need AES instructions and misaligned SSE support");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_cipherBlockChaining_decryptAESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
@@ -1055,6 +1099,9 @@ address StubGenerator::generate_cipherBlockChaining_decryptVectorAESCrypt() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1067,11 +1114,17 @@ address StubGenerator::generate_cipherBlockChaining_decryptVectorAESCrypt() {
//
address StubGenerator::generate_aescrypt_encryptBlock() {
assert(UseAES, "need AES instructions and misaligned SSE support");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_aescrypt_encryptBlock_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
Label L_doLast;
- address start = __ pc();
+ start = __ pc();
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
@@ -1150,6 +1203,9 @@ address StubGenerator::generate_aescrypt_encryptBlock() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1162,11 +1218,17 @@ address StubGenerator::generate_aescrypt_encryptBlock() {
//
address StubGenerator::generate_aescrypt_decryptBlock() {
assert(UseAES, "need AES instructions and misaligned SSE support");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_aescrypt_decryptBlock_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
Label L_doLast;
- address start = __ pc();
+ start = __ pc();
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
@@ -1246,6 +1308,9 @@ address StubGenerator::generate_aescrypt_decryptBlock() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1264,10 +1329,16 @@ address StubGenerator::generate_aescrypt_decryptBlock() {
//
address StubGenerator::generate_cipherBlockChaining_encryptAESCrypt() {
assert(UseAES, "need AES instructions and misaligned SSE support");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_cipherBlockChaining_encryptAESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
const Register from = c_rarg0; // source array address
@@ -1396,9 +1467,213 @@ address StubGenerator::generate_cipherBlockChaining_encryptAESCrypt() {
__ jcc(Assembler::notEqual, L_loopTop_256);
__ jmp(L_exit);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
+// This is a version of ECB/AES Encrypt/Decrypt which does 4 blocks in a loop
+// at a time to hide instruction latency.
+//
+// For encryption (is_encrypt=true):
+// pxor key[0], aesenc key[1..rounds-1], aesenclast key[rounds]
+// For decryption (is_encrypt=false):
+// pxor key[1], aesdec key[2..rounds], aesdeclast key[0]
+//
+// Arguments:
+//
+// Inputs:
+// c_rarg0 - source byte array address
+// c_rarg1 - destination byte array address
+// c_rarg2 - session key (Ke/Kd) in little endian int array
+// c_rarg3 - input length (must be multiple of blocksize 16)
+//
+// Output:
+// rax - input length
+//
+address StubGenerator::generate_electronicCodeBook_AESCrypt_Parallel(bool is_encrypt) {
+ assert(UseAES, "need AES instructions and misaligned SSE support");
+ StubId stub_id = is_encrypt ? StubId::stubgen_electronicCodeBook_encryptAESCrypt_id
+ : StubId::stubgen_electronicCodeBook_decryptAESCrypt_id;
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, stub_id);
+ start = __ pc();
+
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register len_reg = c_rarg3; // src len (must be multiple of blocksize 16)
+ const Register pos = rax;
+ const Register keylen = r11;
+
+ const XMMRegister xmm_result0 = xmm0;
+ const XMMRegister xmm_result1 = xmm1;
+ const XMMRegister xmm_result2 = xmm2;
+ const XMMRegister xmm_result3 = xmm3;
+ const XMMRegister xmm_key_shuf_mask = xmm4;
+ const XMMRegister xmm_key_tmp = xmm5;
+ // keys 0-9 pre-loaded into xmm6-xmm15
+ const int XMM_REG_NUM_KEY_FIRST = 6;
+ const int XMM_REG_NUM_KEY_LAST = 15;
+ const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+
+ // for key_128, key_192, key_256
+ const int ROUNDS[3] = {10, 12, 14};
+
+ Label L_exit;
+ Label L_loop4[3], L_single[3], L_done[3];
+
+#ifdef DoFour
+#undef DoFour
+#endif
+#ifdef DoOne
+#undef DoOne
+#endif
+
+#define DoFour(opc, reg) \
+__ opc(xmm_result0, reg); \
+__ opc(xmm_result1, reg); \
+__ opc(xmm_result2, reg); \
+__ opc(xmm_result3, reg);
+
+#define DoOne(opc, reg) \
+__ opc(xmm_result0, reg);
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ __ push(len_reg); // save original length for return value
+
+ __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()), r10 /*rscratch*/);
+ // load up xmm regs 6 thru 15 with keys 0x00 - 0x90
+ for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++, offset += 0x10) {
+ load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+ }
+ __ xorptr(pos, pos);
+
+ // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
+ __ cmpl(keylen, 52);
+ __ jcc(Assembler::equal, L_loop4[1]);
+ __ cmpl(keylen, 60);
+ __ jcc(Assembler::equal, L_loop4[2]);
+
+ // k == 0: generate code for key_128
+ // k == 1: generate code for key_192
+ // k == 2: generate code for key_256
+ for (int k = 0; k < 3; ++k) {
+ __ align(OptoLoopAlignment);
+ __ BIND(L_loop4[k]);
+ __ cmpptr(len_reg, 4 * AESBlockSize);
+ __ jcc(Assembler::less, L_single[k]);
+
+ __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
+ __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
+ __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
+ __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
+
+ if (is_encrypt) {
+ DoFour(pxor, xmm_key_first);
+ for (int rnum = 1; rnum < 10; rnum++) {
+ DoFour(aesenc, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
+ }
+ for (int i = 10; i < ROUNDS[k]; i++) {
+ load_key(xmm_key_tmp, key, i * 0x10, xmm_key_shuf_mask);
+ DoFour(aesenc, xmm_key_tmp);
+ }
+ load_key(xmm_key_tmp, key, ROUNDS[k] * 0x10, xmm_key_shuf_mask);
+ DoFour(aesenclast, xmm_key_tmp);
+ } else {
+ DoFour(pxor, as_XMMRegister(1 + XMM_REG_NUM_KEY_FIRST));
+ for (int rnum = 2; rnum < 10; rnum++) {
+ DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
+ }
+ for (int i = 10; i <= ROUNDS[k]; i++) {
+ load_key(xmm_key_tmp, key, i * 0x10, xmm_key_shuf_mask);
+ DoFour(aesdec, xmm_key_tmp);
+ }
+ DoFour(aesdeclast, xmm_key_first);
+ }
+
+ __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
+ __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
+ __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
+ __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
+
+ __ addptr(pos, 4 * AESBlockSize);
+ __ subptr(len_reg, 4 * AESBlockSize);
+ __ jmp(L_loop4[k]);
+
+ __ align(OptoLoopAlignment);
+ __ BIND(L_single[k]);
+ __ cmpptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::less, L_done[k]);
+
+ __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0));
+
+ if (is_encrypt) {
+ DoOne(pxor, xmm_key_first);
+ for (int rnum = 1; rnum < 10; rnum++) {
+ DoOne(aesenc, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
+ }
+ for (int i = 10; i < ROUNDS[k]; i++) {
+ load_key(xmm_key_tmp, key, i * 0x10, xmm_key_shuf_mask);
+ DoOne(aesenc, xmm_key_tmp);
+ }
+ load_key(xmm_key_tmp, key, ROUNDS[k] * 0x10, xmm_key_shuf_mask);
+ DoOne(aesenclast, xmm_key_tmp);
+ } else {
+ DoOne(pxor, as_XMMRegister(1 + XMM_REG_NUM_KEY_FIRST));
+ for (int rnum = 2; rnum < 10; rnum++) {
+ DoOne(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
+ }
+ for (int i = 10; i <= ROUNDS[k]; i++) {
+ load_key(xmm_key_tmp, key, i * 0x10, xmm_key_shuf_mask);
+ DoOne(aesdec, xmm_key_tmp);
+ }
+ DoOne(aesdeclast, xmm_key_first);
+ }
+
+ __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result0);
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jmp(L_single[k]);
+
+ __ BIND(L_done[k]);
+ if (k < 2) __ jmp(L_exit);
+ } //for key_128/192/256
+
+ __ BIND(L_exit);
+ // Clear all XMM registers holding sensitive key material before returning
+ __ pxor(xmm_key_tmp, xmm_key_tmp);
+ for (int rnum = XMM_REG_NUM_KEY_FIRST; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+ __ pxor(as_XMMRegister(rnum), as_XMMRegister(rnum));
+ }
+ __ pop(rax);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
+ return start;
+
+#undef DoFour
+#undef DoOne
+}
+
+address StubGenerator::generate_electronicCodeBook_encryptAESCrypt_Parallel() {
+ return generate_electronicCodeBook_AESCrypt_Parallel(true);
+}
+
+address StubGenerator::generate_electronicCodeBook_decryptAESCrypt_Parallel() {
+ return generate_electronicCodeBook_AESCrypt_Parallel(false);
+}
+
// This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time
// to hide instruction latency
//
@@ -1416,10 +1691,16 @@ address StubGenerator::generate_cipherBlockChaining_encryptAESCrypt() {
//
address StubGenerator::generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
assert(UseAES, "need AES instructions and misaligned SSE support");
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_cipherBlockChaining_decryptAESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
@@ -1493,7 +1774,7 @@ address StubGenerator::generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
__ opc(xmm_result0, src_reg); \
__ opc(xmm_result1, src_reg); \
__ opc(xmm_result2, src_reg); \
-__ opc(xmm_result3, src_reg); \
+__ opc(xmm_result3, src_reg);
for (int k = 0; k < 3; ++k) {
__ BIND(L_multiBlock_loopTopHead[k]);
@@ -1655,14 +1936,23 @@ __ opc(xmm_result3, src_reg); \
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_electronicCodeBook_encryptAESCrypt() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_electronicCodeBook_encryptAESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
@@ -1676,14 +1966,23 @@ address StubGenerator::generate_electronicCodeBook_encryptAESCrypt() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
address StubGenerator::generate_electronicCodeBook_decryptAESCrypt() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_electronicCodeBook_decryptAESCrypt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
@@ -1697,6 +1996,9 @@ address StubGenerator::generate_electronicCodeBook_decryptAESCrypt() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -4096,3 +4398,27 @@ void StubGenerator::aesgcm_avx2(Register in, Register len, Register ct, Register
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_aes(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr))
+ ADD(key_shuffle_mask_addr());
+ ADD(counter_shuffle_mask_addr());
+ ADD(counter_mask_linc0_addr());
+ ADD(counter_mask_linc1_addr());
+ ADD(counter_mask_linc1f_addr());
+ ADD(counter_mask_linc2_addr());
+ ADD(counter_mask_linc2f_addr());
+ ADD(counter_mask_linc4_addr());
+ ADD(counter_mask_linc8_addr());
+ ADD(counter_mask_linc16_addr());
+ ADD(counter_mask_linc32_addr());
+ ADD(counter_mask_ones_addr());
+ ADD(ghash_polynomial_reduction_addr());
+ ADD(ghash_polynomial_two_one_addr());
+ ADD(counter_mask_addbe_4444_addr());
+ ADD(counter_mask_addbe_1234_addr());
+ ADD(counter_mask_add_1234_addr());
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
index d53fafafdb4..5530e5325de 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
@@ -511,12 +511,12 @@ void StubGenerator::copy_bytes_backward(Register from, Register dest,
// - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs)
// for both special cases (various small block sizes) and aligned copy loop. This is the
// default configuration.
-// - If copy length is above AVX3Threshold, then implementation use 64 byte vectors (ZMMs)
+// - If copy length is above CopyAVX3Threshold, then implementation use 64 byte vectors (ZMMs)
// for main copy loop (and subsequent tail) since bulk of the cycles will be consumed in it.
// - If user forces MaxVectorSize=32 then above 4096 bytes its seen that REP MOVs shows a
// better performance for disjoint copies. For conjoint/backward copy vector based
// copy performs better.
-// - If user sets AVX3Threshold=0, then special cases for small blocks sizes operate over
+// - If user sets CopyAVX3Threshold=0, then special cases for small blocks sizes operate over
// 64 byte vector registers (ZMMs).
// Inputs:
@@ -570,13 +570,47 @@ address StubGenerator::generate_disjoint_copy_avx3_masked(StubId stub_id, addres
default:
ShouldNotReachHere();
}
+ GrowableArray entries;
+ GrowableArray extras;
+ bool add_handlers = !is_oop && !aligned;
+ bool add_relocs = UseZGC && is_oop;
+ bool add_extras = add_handlers || add_relocs;
+ // The stub employs one unsafe handler region by default but has two
+ // when MaxVectorSize == 64 So we may expect 0, 3 or 6 extras.
+ int handlers_count = (MaxVectorSize == 64 ? 2 : 1);
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int expected_extra_count = (add_handlers ? handlers_count : 0) * UnsafeMemoryAccess::COLUMN_COUNT; // 0/1/2 x UMAM {start,end,handler}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ GrowableArray* extras_ptr = (add_extras ? &extras : nullptr);
+ address start = load_archive_data(stub_id, entries_ptr, extras_ptr);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entry count %d", entries.length());
+ assert(!add_handlers || extras.length() == expected_extra_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+ if (add_handlers) {
+ // restore 1/2 x UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, handlers_count);
+ }
+#if INCLUDE_ZGC
+ // register addresses at which ZGC does colour patching
+ if (add_relocs) {
+ register_reloc_addresses(extras, 0, extras.length());
+ }
+#endif // INCLUDE_ZGC
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
- int avx3threshold = VM_Version::avx3_threshold();
- bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0);
+ bool use64byteVector = (MaxVectorSize > 32) && (CopyAVX3Threshold == 0);
const int large_threshold = 2621440; // 2.5 MB
Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
Label L_repmovs, L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
@@ -596,6 +630,7 @@ address StubGenerator::generate_disjoint_copy_avx3_masked(StubId stub_id, addres
if (entry != nullptr) {
*entry = __ pc();
+ entries.append(*entry);
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -621,7 +656,7 @@ address StubGenerator::generate_disjoint_copy_avx3_masked(StubId stub_id, addres
int threshold[] = { 4096, 2048, 1024, 512};
// UnsafeMemoryAccess page error: continue after unsafe access
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, true);
+ UnsafeMemoryAccessMark umam(this, add_handlers, true);
// 'from', 'to' and 'count' are now valid
// temp1 holds remaining count and temp4 holds running count used to compute
@@ -647,7 +682,7 @@ address StubGenerator::generate_disjoint_copy_avx3_masked(StubId stub_id, addres
__ cmpq(temp2, large_threshold);
__ jcc(Assembler::greaterEqual, L_copy_large);
}
- if (avx3threshold != 0) {
+ if (CopyAVX3Threshold != 0) {
__ cmpq(count, threshold[shift]);
if (MaxVectorSize == 64) {
// Copy using 64 byte vectors.
@@ -659,7 +694,7 @@ address StubGenerator::generate_disjoint_copy_avx3_masked(StubId stub_id, addres
}
}
- if ((MaxVectorSize < 64) || (avx3threshold != 0)) {
+ if ((MaxVectorSize < 64) || (CopyAVX3Threshold != 0)) {
// Partial copy to make dst address 32 byte aligned.
__ movq(temp2, to);
__ andq(temp2, 31);
@@ -790,10 +825,28 @@ address StubGenerator::generate_disjoint_copy_avx3_masked(StubId stub_id, addres
if (MaxVectorSize == 64) {
__ BIND(L_copy_large);
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, false, ucme_exit_pc);
+ UnsafeMemoryAccessMark umam(this, add_handlers, false, ucme_exit_pc);
arraycopy_avx3_large(to, from, temp1, temp2, temp3, temp4, count, xmm1, xmm2, xmm3, xmm4, shift);
__ jmp(L_finish);
}
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ if (add_handlers) {
+ retrieve_unsafe_access_handlers(start, end, extras);
+ }
+ assert(extras.length() == expected_extra_count,
+ "unexpected handler addresses count %d", extras.length());
+#if INCLUDE_ZGC
+ // retrieve addresses at which ZGC does colour patching
+ if (add_relocs) {
+ retrieve_reloc_addresses(start, end, extras);
+ }
+#endif // INCLUDE_ZGC
+
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, extras_ptr);
+
return start;
}
@@ -908,13 +961,43 @@ address StubGenerator::generate_conjoint_copy_avx3_masked(StubId stub_id, addres
default:
ShouldNotReachHere();
}
-
+ GrowableArray entries;
+ GrowableArray extras;
+ bool add_handlers = !is_oop && !aligned;
+ bool add_relocs = UseZGC && is_oop;
+ bool add_extras = add_handlers || add_relocs;
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int expected_handler_count = (add_handlers ? 1 : 0) * UnsafeMemoryAccess::COLUMN_COUNT; // 0/1 x UMAM {start,end,handler}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ GrowableArray* extras_ptr = (add_extras ? &extras : nullptr);
+ address start = load_archive_data(stub_id, entries_ptr, extras_ptr);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entry count %d", entries.length());
+ assert(!add_handlers || extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+ if (add_handlers) {
+ // restore 1 x UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, 1);
+ }
+#if INCLUDE_ZGC
+ if (add_relocs) {
+ // register addresses at which ZGC does colour patching
+ register_reloc_addresses(extras, 0, extras.length());
+ }
+#endif // INCLUDE_ZGC
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
- int avx3threshold = VM_Version::avx3_threshold();
- bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0);
+ bool use64byteVector = (MaxVectorSize > 32) && (CopyAVX3Threshold == 0);
Label L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
@@ -933,6 +1016,7 @@ address StubGenerator::generate_conjoint_copy_avx3_masked(StubId stub_id, addres
if (entry != nullptr) {
*entry = __ pc();
+ entries.append(*entry);
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -959,7 +1043,7 @@ address StubGenerator::generate_conjoint_copy_avx3_masked(StubId stub_id, addres
int threshold[] = { 4096, 2048, 1024, 512};
// UnsafeMemoryAccess page error: continue after unsafe access
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, true);
+ UnsafeMemoryAccessMark umam(this, add_handlers, true);
// 'from', 'to' and 'count' are now valid
// temp1 holds remaining count.
@@ -979,12 +1063,12 @@ address StubGenerator::generate_conjoint_copy_avx3_masked(StubId stub_id, addres
// PRE-MAIN-POST loop for aligned copy.
__ BIND(L_entry);
- if ((MaxVectorSize > 32) && (avx3threshold != 0)) {
+ if ((MaxVectorSize > 32) && (CopyAVX3Threshold != 0)) {
__ cmpq(temp1, threshold[shift]);
__ jcc(Assembler::greaterEqual, L_pre_main_post_64);
}
- if ((MaxVectorSize < 64) || (avx3threshold != 0)) {
+ if ((MaxVectorSize < 64) || (CopyAVX3Threshold != 0)) {
// Partial copy to make dst address 32 byte aligned.
__ leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0));
__ andq(temp2, 31);
@@ -1073,6 +1157,23 @@ address StubGenerator::generate_conjoint_copy_avx3_masked(StubId stub_id, addres
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ if (add_handlers) {
+ retrieve_unsafe_access_handlers(start, end, extras);
+ }
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+#if INCLUDE_ZGC
+ // retrieve addresses at which ZGC does colour patching
+ if (add_relocs) {
+ retrieve_reloc_addresses(start, end, extras);
+ }
+#endif // INCLUDE_ZGC
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, extras_ptr);
+
return start;
}
@@ -1199,7 +1300,7 @@ void StubGenerator::arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegi
bool use64byteVector, Label& L_entry, Label& L_exit) {
Label L_entry_64, L_entry_96, L_entry_128;
Label L_entry_160, L_entry_192;
- bool avx3 = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
+ bool avx3 = (MaxVectorSize > 32) && (CopyAVX3Threshold == 0);
int size_mat[][6] = {
/* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 },
@@ -1387,9 +1488,29 @@ address StubGenerator::generate_disjoint_byte_copy(address* entry) {
return generate_disjoint_copy_avx3_masked(stub_id, entry);
}
#endif
+ GrowableArray entries;
+ GrowableArray extras;
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int expected_handler_count = (2 * UnsafeMemoryAccess::COLUMN_COUNT); // 2 x UMAM {start,end,handler}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ address start = load_archive_data(stub_id, entries_ptr, &extras);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entry count %d", entries.length());
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+ // restore 2 UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, 2);
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
@@ -1409,6 +1530,7 @@ address StubGenerator::generate_disjoint_byte_copy(address* entry) {
if (entry != nullptr) {
*entry = __ pc();
+ entries.append(*entry);
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -1478,6 +1600,17 @@ __ BIND(L_exit);
copy_bytes_forward(end_from, end_to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, T_BYTE);
__ jmp(L_copy_4_bytes);
}
+
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ retrieve_unsafe_access_handlers(start, end, extras);
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, &extras);
+
return start;
}
@@ -1505,9 +1638,29 @@ address StubGenerator::generate_conjoint_byte_copy(address nooverlap_target, add
return generate_conjoint_copy_avx3_masked(stub_id, entry, nooverlap_target);
}
#endif
+ GrowableArray entries;
+ GrowableArray extras;
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int expected_handler_count = (2 * UnsafeMemoryAccess::COLUMN_COUNT); // 2 x UMAM {start,end,handler}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ address start = load_archive_data(stub_id, entries_ptr, &extras);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entry count %d", entries.length());
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+ // restore 2 UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, 2);
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
DecoratorSet decorators = IN_HEAP | IS_ARRAY;
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
@@ -1522,6 +1675,7 @@ address StubGenerator::generate_conjoint_byte_copy(address nooverlap_target, add
if (entry != nullptr) {
*entry = __ pc();
+ entries.append(*entry);
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -1588,6 +1742,16 @@ address StubGenerator::generate_conjoint_byte_copy(address nooverlap_target, add
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ retrieve_unsafe_access_handlers(start, end, extras);
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, &extras);
+
return start;
}
@@ -1618,10 +1782,29 @@ address StubGenerator::generate_disjoint_short_copy(address *entry) {
return generate_disjoint_copy_avx3_masked(stub_id, entry);
}
#endif
-
+ GrowableArray entries;
+ GrowableArray extras;
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int expected_handler_count = (2 * UnsafeMemoryAccess::COLUMN_COUNT); // 2 x UMAM {start,end,handler}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ address start = load_archive_data(stub_id, entries_ptr, &extras);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entry count %d", entries.length());
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+ // restore 2 UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, 2);
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
@@ -1640,6 +1823,7 @@ address StubGenerator::generate_disjoint_short_copy(address *entry) {
if (entry != nullptr) {
*entry = __ pc();
+ entries.append(*entry);
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -1703,6 +1887,16 @@ __ BIND(L_exit);
__ jmp(L_copy_4_bytes);
}
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ retrieve_unsafe_access_handlers(start, end, extras);
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, &extras);
+
return start;
}
@@ -1710,7 +1904,6 @@ __ BIND(L_exit);
address StubGenerator::generate_fill(StubId stub_id) {
BasicType t;
bool aligned;
-
switch (stub_id) {
case StubId::stubgen_jbyte_fill_id:
t = T_BYTE;
@@ -1739,10 +1932,27 @@ address StubGenerator::generate_fill(StubId stub_id) {
default:
ShouldNotReachHere();
}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ GrowableArray extras;
+ bool add_handlers = ((t == T_BYTE) && !aligned);
+ int handlers_count = (add_handlers ? 1 : 0);
+ int expected_extras_count = (handlers_count * UnsafeMemoryAccess::COLUMN_COUNT); // 0/1 x UMAM {start,end,handler}
+ GrowableArray* extras_ptr = (add_handlers ? &extras : nullptr);
+ address start = load_archive_data(stub_id, nullptr, extras_ptr);
+ if (start != nullptr) {
+ assert(extras.length() == expected_extras_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (add_handlers) {
+ // restore 1 x UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, 1);
+ }
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
BLOCK_COMMENT("Entry:");
@@ -1755,7 +1965,7 @@ address StubGenerator::generate_fill(StubId stub_id) {
{
// Add set memory mark to protect against unsafe accesses faulting
- UnsafeMemoryAccessMark umam(this, ((t == T_BYTE) && !aligned), true);
+ UnsafeMemoryAccessMark umam(this, add_handlers, true);
__ generate_fill(t, aligned, to, value, r11, rax, xmm0);
}
@@ -1763,6 +1973,15 @@ address StubGenerator::generate_fill(StubId stub_id) {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ address end = __ pc();
+ if (add_handlers) {
+ retrieve_unsafe_access_handlers(start, end, extras);
+ }
+ assert(extras.length() == expected_extras_count,
+ "unexpected handler addresses count %d", extras.length());
+ // record the stub entry and end
+ store_archive_data(stub_id, start, end, nullptr, extras_ptr);
+
return start;
}
@@ -1790,10 +2009,29 @@ address StubGenerator::generate_conjoint_short_copy(address nooverlap_target, ad
return generate_conjoint_copy_avx3_masked(stub_id, entry, nooverlap_target);
}
#endif
-
+ GrowableArray entries;
+ GrowableArray extras;
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int expected_handler_count = (2 * UnsafeMemoryAccess::COLUMN_COUNT); // 2 x UMAM {start,end,handler}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ address start = load_archive_data(stub_id, entries_ptr, &extras);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entry count %d", entries.length());
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+ // restore 2 UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, 2);
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
DecoratorSet decorators = IN_HEAP | IS_ARRAY;
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes;
@@ -1808,6 +2046,7 @@ address StubGenerator::generate_conjoint_short_copy(address nooverlap_target, ad
if (entry != nullptr) {
*entry = __ pc();
+ entries.append(*entry);
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -1866,6 +2105,16 @@ address StubGenerator::generate_conjoint_short_copy(address nooverlap_target, ad
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ retrieve_unsafe_access_handlers(start, end, extras);
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, &extras);
+
return start;
}
@@ -1918,10 +2167,42 @@ address StubGenerator::generate_disjoint_int_oop_copy(StubId stub_id, address* e
return generate_disjoint_copy_avx3_masked(stub_id, entry);
}
#endif
+ GrowableArray entries;
+ GrowableArray extras;
+ bool add_handlers = !is_oop && !aligned;
+ bool add_relocs = UseZGC && is_oop;
+ bool add_extras = add_handlers || add_relocs;
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int expected_handler_count = (add_handlers ? 2 : 0) * UnsafeMemoryAccess::COLUMN_COUNT; // 0/2 x UMAM {start,end,handler}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ GrowableArray* extras_ptr = (add_extras ? &extras : nullptr);
+ address start = load_archive_data(stub_id, entries_ptr, extras_ptr);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entry count %d", entries.length());
+ assert(!add_handlers || extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+ if (add_handlers) {
+ // restore 2 UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, 2);
+ }
+#if INCLUDE_ZGC
+ // register addresses at which ZGC does colour patching
+ if (add_relocs) {
+ register_reloc_addresses(extras, 0, extras.length());
+ }
+#endif // INCLUDE_ZGC
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
const Register from = rdi; // source array address
@@ -1939,6 +2220,7 @@ address StubGenerator::generate_disjoint_int_oop_copy(StubId stub_id, address* e
if (entry != nullptr) {
*entry = __ pc();
+ entries.append(*entry);
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -1959,7 +2241,7 @@ address StubGenerator::generate_disjoint_int_oop_copy(StubId stub_id, address* e
{
// UnsafeMemoryAccess page error: continue after unsafe access
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, true);
+ UnsafeMemoryAccessMark umam(this, add_handlers, true);
// 'from', 'to' and 'count' are now valid
__ movptr(dword_count, count);
__ shrptr(count, 1); // count => qword_count
@@ -1971,20 +2253,20 @@ address StubGenerator::generate_disjoint_int_oop_copy(StubId stub_id, address* e
__ jmp(L_copy_bytes);
// Copy trailing qwords
- __ BIND(L_copy_8_bytes);
+ __ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
__ increment(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
// Check for and copy trailing dword
- __ BIND(L_copy_4_bytes);
+ __ BIND(L_copy_4_bytes);
__ testl(dword_count, 1); // Only byte test since the value is 0 or 1
__ jccb(Assembler::zero, L_exit);
__ movl(rax, Address(end_from, 8));
__ movl(Address(end_to, 8), rax);
}
-__ BIND(L_exit);
+ __ BIND(L_exit);
address ucme_exit_pc = __ pc();
bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
restore_arg_regs_using_thread();
@@ -1995,12 +2277,30 @@ __ BIND(L_exit);
__ ret(0);
{
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, false, ucme_exit_pc);
+ UnsafeMemoryAccessMark umam(this, add_handlers, false, ucme_exit_pc);
// Copy in multi-bytes chunks
copy_bytes_forward(end_from, end_to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, is_oop ? T_OBJECT : T_INT);
__ jmp(L_copy_4_bytes);
}
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ if (add_handlers) {
+ retrieve_unsafe_access_handlers(start, end, extras);
+ }
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+#if INCLUDE_ZGC
+ // retrieve addresses at which ZGC does colour patching
+ if (add_relocs) {
+ retrieve_reloc_addresses(start, end, extras);
+ }
+#endif // INCLUDE_ZGC
+
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, extras_ptr);
+
return start;
}
@@ -2049,10 +2349,42 @@ address StubGenerator::generate_conjoint_int_oop_copy(StubId stub_id, address no
return generate_conjoint_copy_avx3_masked(stub_id, entry, nooverlap_target);
}
#endif
+ bool add_handlers = !is_oop && !aligned;
+ bool add_relocs = UseZGC && is_oop;
+ bool add_extras = add_handlers || add_relocs;
+ GrowableArray entries;
+ GrowableArray extras;
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int expected_handler_count = (add_handlers ? 2 : 0) * UnsafeMemoryAccess::COLUMN_COUNT; // 0/2 x UMAM {start,end,handler}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ GrowableArray* extras_ptr = (add_extras ? &extras : nullptr);
+ address start = load_archive_data(stub_id, entries_ptr, extras_ptr);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entry count %d", entries.length());
+ assert(!add_handlers || extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+ if (add_handlers) {
+ // restore 2 UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, 2);
+ }
+#if INCLUDE_ZGC
+ // register addresses at which ZGC does colour patching
+ if (add_relocs) {
+ register_reloc_addresses(extras, 6, extras.length());
+ }
+#endif // INCLUDE_ZGC
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_exit;
const Register from = rdi; // source array address
@@ -2066,7 +2398,8 @@ address StubGenerator::generate_conjoint_int_oop_copy(StubId stub_id, address no
if (entry != nullptr) {
*entry = __ pc();
- // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+ entries.append(*entry);
+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -2089,7 +2422,7 @@ address StubGenerator::generate_conjoint_int_oop_copy(StubId stub_id, address no
assert_clean_int(count, rax); // Make sure 'count' is clean int.
{
// UnsafeMemoryAccess page error: continue after unsafe access
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, true);
+ UnsafeMemoryAccessMark umam(this, add_handlers, true);
// 'from', 'to' and 'count' are now valid
__ movptr(dword_count, count);
__ shrptr(count, 1); // count => qword_count
@@ -2104,7 +2437,7 @@ address StubGenerator::generate_conjoint_int_oop_copy(StubId stub_id, address no
__ jmp(L_copy_bytes);
// Copy trailing qwords
- __ BIND(L_copy_8_bytes);
+ __ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
__ decrement(qword_count);
@@ -2122,12 +2455,12 @@ address StubGenerator::generate_conjoint_int_oop_copy(StubId stub_id, address no
{
// UnsafeMemoryAccess page error: continue after unsafe access
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, true);
+ UnsafeMemoryAccessMark umam(this, add_handlers, true);
// Copy in multi-bytes chunks
copy_bytes_backward(from, to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, is_oop ? T_OBJECT : T_INT);
}
-__ BIND(L_exit);
+ __ BIND(L_exit);
bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
restore_arg_regs_using_thread();
INC_COUNTER_NP(SharedRuntime::_jint_array_copy_ctr, rscratch1); // Update counter after rscratch1 is free
@@ -2136,6 +2469,23 @@ __ BIND(L_exit);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ if (add_handlers) {
+ retrieve_unsafe_access_handlers(start, end, extras);
+ }
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+#if INCLUDE_ZGC
+ // retrieve addresses at which ZGC does colour patching
+ if (add_relocs) {
+ retrieve_reloc_addresses(start, end, extras);
+ }
+#endif // INCLUDE_ZGC
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, extras_ptr);
+
return start;
}
@@ -2182,10 +2532,42 @@ address StubGenerator::generate_disjoint_long_oop_copy(StubId stub_id, address *
return generate_disjoint_copy_avx3_masked(stub_id, entry);
}
#endif
+ bool add_handlers = !is_oop && !aligned;
+ bool add_relocs = UseZGC && is_oop;
+ bool add_extras = add_handlers || add_relocs;
+ GrowableArray entries;
+ GrowableArray extras;
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int expected_handler_count = (add_handlers ? 2 : 0) * UnsafeMemoryAccess::COLUMN_COUNT; // 0/2 x UMAM {start,end,handler}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ GrowableArray* extras_ptr = (add_extras ? &extras : nullptr);
+ address start = load_archive_data(stub_id, entries_ptr, extras_ptr);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entry count %d", entries.length());
+ assert(!add_handlers || extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+ if (add_handlers) {
+ // restore 2 UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, 2);
+ }
+#if INCLUDE_ZGC
+ // register addresses at which ZGC does colour patching
+ if (add_relocs) {
+ register_reloc_addresses(extras, 0, extras.length());
+ }
+#endif // INCLUDE_ZGC
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_exit;
const Register from = rdi; // source array address
@@ -2203,6 +2585,7 @@ address StubGenerator::generate_disjoint_long_oop_copy(StubId stub_id, address *
if (entry != nullptr) {
*entry = __ pc();
+ entries.append(*entry);
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -2223,7 +2606,7 @@ address StubGenerator::generate_disjoint_long_oop_copy(StubId stub_id, address *
bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
{
// UnsafeMemoryAccess page error: continue after unsafe access
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, true);
+ UnsafeMemoryAccessMark umam(this, add_handlers, true);
// Copy from low to high addresses. Use 'to' as scratch.
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
@@ -2255,7 +2638,7 @@ address StubGenerator::generate_disjoint_long_oop_copy(StubId stub_id, address *
{
// UnsafeMemoryAccess page error: continue after unsafe access
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, true);
+ UnsafeMemoryAccessMark umam(this, add_handlers, true);
// Copy in multi-bytes chunks
copy_bytes_forward(end_from, end_to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, is_oop ? T_OBJECT : T_LONG);
}
@@ -2271,6 +2654,23 @@ address StubGenerator::generate_disjoint_long_oop_copy(StubId stub_id, address *
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ if (add_handlers) {
+ retrieve_unsafe_access_handlers(start, end, extras);
+ }
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+#if INCLUDE_ZGC
+ // retrieve addresses at which ZGC does colour patching
+ if (add_relocs) {
+ retrieve_reloc_addresses(start, end, extras);
+ }
+#endif // INCLUDE_ZGC
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, extras_ptr);
+
return start;
}
@@ -2315,10 +2715,42 @@ address StubGenerator::generate_conjoint_long_oop_copy(StubId stub_id, address n
return generate_conjoint_copy_avx3_masked(stub_id, entry, nooverlap_target);
}
#endif
+ bool add_handlers = !is_oop && !aligned;
+ bool add_relocs = UseZGC && is_oop;
+ bool add_extras = add_handlers || add_relocs;
+ GrowableArray entries;
+ GrowableArray extras;
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int expected_handler_count = (add_handlers ? 2 : 0) * UnsafeMemoryAccess::COLUMN_COUNT; // 0/2 x UMAM {start,end,handler}
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ GrowableArray* extras_ptr = (add_extras ? &extras : nullptr);
+ address start = load_archive_data(stub_id, entries_ptr, extras_ptr);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected entry count %d", entries.length());
+ assert(!add_handlers || extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+ if (add_handlers) {
+ // restore 2 UMAM {start,end,handler} addresses from extras
+ register_unsafe_access_handlers(extras, 0, 2);
+ }
+#if INCLUDE_ZGC
+ // register addresses at which ZGC does colour patching
+ if (add_relocs) {
+ register_reloc_addresses(extras, 0, extras.length());
+ }
+#endif // INCLUDE_ZGC
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_exit;
const Register from = rdi; // source array address
@@ -2331,6 +2763,7 @@ address StubGenerator::generate_conjoint_long_oop_copy(StubId stub_id, address n
if (entry != nullptr) {
*entry = __ pc();
+ entries.append(*entry);
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
@@ -2352,7 +2785,7 @@ address StubGenerator::generate_conjoint_long_oop_copy(StubId stub_id, address n
bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
{
// UnsafeMemoryAccess page error: continue after unsafe access
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, true);
+ UnsafeMemoryAccessMark umam(this, add_handlers, true);
__ jmp(L_copy_bytes);
@@ -2379,7 +2812,7 @@ address StubGenerator::generate_conjoint_long_oop_copy(StubId stub_id, address n
}
{
// UnsafeMemoryAccess page error: continue after unsafe access
- UnsafeMemoryAccessMark umam(this, !is_oop && !aligned, true);
+ UnsafeMemoryAccessMark umam(this, add_handlers, true);
// Copy in multi-bytes chunks
copy_bytes_backward(from, to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, is_oop ? T_OBJECT : T_LONG);
@@ -2395,6 +2828,24 @@ address StubGenerator::generate_conjoint_long_oop_copy(StubId stub_id, address n
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ if (add_handlers) {
+ retrieve_unsafe_access_handlers(start, end, extras);
+ }
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+#if INCLUDE_ZGC
+ // retrieve addresses at which ZGC does colour patching
+ if ((UseZGC && is_oop)) {
+ retrieve_reloc_addresses(start, end, extras);
+ }
+#endif // INCLUDE_ZGC
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, entries_ptr, extras_ptr);
+
return start;
}
@@ -2450,6 +2901,28 @@ address StubGenerator::generate_checkcast_copy(StubId stub_id, address *entry) {
ShouldNotReachHere();
}
+ GrowableArray entries;
+ GrowableArray extras;
+ int expected_entry_count = (entry != nullptr ? 2 : 1);
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == expected_entry_count, "sanity check");
+ GrowableArray* entries_ptr = (entry_count == 1 ? nullptr : &entries);
+ GrowableArray* extras_ptr = (UseZGC ? &extras : nullptr);
+ address start = load_archive_data(stub_id, entries_ptr, extras_ptr);
+ if (start != nullptr) {
+ assert(entries.length() == expected_entry_count - 1,
+ "unexpected addresses count %d", entries.length());
+ if (entry != nullptr) {
+ *entry = entries.at(0);
+ }
+#if INCLUDE_ZGC
+ if (UseZGC) {
+ register_reloc_addresses(extras, 0, extras.length());
+ }
+#endif // INCLUDE_ZGC
+ return start;
+ }
+
Label L_load_element, L_store_element, L_do_card_marks, L_done;
// Input registers (after setup_arg_regs)
@@ -2479,7 +2952,7 @@ address StubGenerator::generate_checkcast_copy(StubId stub_id, address *entry) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
@@ -2504,6 +2977,7 @@ address StubGenerator::generate_checkcast_copy(StubId stub_id, address *entry) {
// Caller of this entry point must set up the argument registers.
if (entry != nullptr) {
*entry = __ pc();
+ entries.append(*entry);
BLOCK_COMMENT("Entry:");
}
@@ -2638,6 +3112,16 @@ address StubGenerator::generate_checkcast_copy(StubId stub_id, address *entry) {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ address end = __ pc();
+#if INCLUDE_ZGC
+ // retrieve addresses at which ZGC does colour patching
+ if (UseZGC) {
+ retrieve_reloc_addresses(start, end, extras);
+ }
+#endif // INCLUDE_ZGC
+ // record the stub entry and end plus the no_push entry
+ store_archive_data(stub_id, start, end, entries_ptr, extras_ptr);
+
return start;
}
@@ -2657,6 +3141,14 @@ address StubGenerator::generate_checkcast_copy(StubId stub_id, address *entry) {
address StubGenerator::generate_unsafe_copy(address byte_copy_entry, address short_copy_entry,
address int_copy_entry, address long_copy_entry) {
+ StubId stub_id = StubId::stubgen_unsafe_arraycopy_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+
Label L_long_aligned, L_int_aligned, L_short_aligned;
// Input registers (before setup_arg_regs)
@@ -2668,9 +3160,8 @@ address StubGenerator::generate_unsafe_copy(address byte_copy_entry, address sho
const Register bits = rax; // test copy of low bits
__ align(CodeEntryAlignment);
- StubId stub_id = StubId::stubgen_unsafe_arraycopy_id;
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
@@ -2702,6 +3193,9 @@ address StubGenerator::generate_unsafe_copy(address byte_copy_entry, address sho
__ shrptr(size, LogBytesPerLong); // size => qword_count
__ jump(RuntimeAddress(long_copy_entry));
+ // record the stub entry and end plus
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -2803,10 +3297,23 @@ static void do_setmemory_atomic_loop(USM_TYPE type, Register dest,
// to an int, short, or byte fill loop.
//
address StubGenerator::generate_unsafe_setmemory(address unsafe_byte_fill) {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_unsafe_setmemory_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ // we expect three set of extra unsafememory access handler entries
+ GrowableArray extras;
+ int expected_handler_count = 3 * UnsafeMemoryAccess::COLUMN_COUNT;
+ address start = load_archive_data(stub_id, nullptr, &extras);
+ if (start != nullptr) {
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+ register_unsafe_access_handlers(extras, 0, 3);
+ return start;
+ }
+
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert(unsafe_byte_fill != nullptr, "Invalid call");
@@ -2896,6 +3403,16 @@ address StubGenerator::generate_unsafe_setmemory(address unsafe_byte_fill) {
__ jump(RuntimeAddress(unsafe_byte_fill));
}
+ // retrieve the registered handler addresses
+ address end = __ pc();
+ retrieve_unsafe_access_handlers(start, end, extras);
+ assert(extras.length() == expected_handler_count,
+ "unexpected handler addresses count %d", extras.length());
+
+ // record the stub entry and end plus the no_push entry and any
+ // extra handler addresses
+ store_archive_data(stub_id, start, end, nullptr, &extras);
+
return start;
}
@@ -2952,7 +3469,15 @@ address StubGenerator::generate_generic_copy(address byte_copy_entry, address sh
address int_copy_entry, address oop_copy_entry,
address long_copy_entry, address checkcast_copy_entry) {
- Label L_failed, L_failed_0, L_objArray;
+ StubId stub_id = StubId::stubgen_generic_arraycopy_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+
+ Label L_failed, L_failed_0, L_skip_failed_0, L_objArray;
Label L_copy_shorts, L_copy_ints, L_copy_longs;
// Input registers
@@ -2968,22 +3493,9 @@ address StubGenerator::generate_generic_copy(address byte_copy_entry, address sh
const Register rklass_tmp = rdi; // load_klass
#endif
- { int modulus = CodeEntryAlignment;
- int target = modulus - 5; // 5 = sizeof jmp(L_failed)
- int advance = target - (__ offset() % modulus);
- if (advance < 0) advance += modulus;
- if (advance > 0) __ nop(advance);
- }
- StubId stub_id = StubId::stubgen_generic_arraycopy_id;
StubCodeMark mark(this, stub_id);
-
- // Short-hop target to L_failed. Makes for denser prologue code.
- __ BIND(L_failed_0);
- __ jmp(L_failed);
- assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
-
__ align(CodeEntryAlignment);
- address start = __ pc();
+ start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
@@ -3024,7 +3536,8 @@ address StubGenerator::generate_generic_copy(address byte_copy_entry, address sh
// if (dst_pos < 0) return -1;
__ testl(dst_pos, dst_pos); // dst_pos (32-bits)
size_t j4off = __ offset();
- __ jccb(Assembler::negative, L_failed_0);
+ // skip over the failure trampoline
+ __ jccb(Assembler::positive, L_skip_failed_0);
// The first four tests are very dense code,
// but not quite dense enough to put four
@@ -3034,6 +3547,13 @@ address StubGenerator::generate_generic_copy(address byte_copy_entry, address sh
// Make sure of this.
guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
+ // Short-hop target to L_failed. Makes for denser prologue code.
+ __ BIND(L_failed_0);
+ __ jmp(L_failed);
+
+ // continue here if first 4 checks pass
+ __ bind(L_skip_failed_0);
+
// registers used as temp
const Register r11_length = r11; // elements count to copy
const Register r10_src_klass = r10; // array klass
@@ -3256,6 +3776,9 @@ __ BIND(L_failed);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp
index 73330dedc0f..4c647b7d9dc 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp
@@ -191,8 +191,14 @@ ATTRIBUTE_ALIGNED(4) static const juint _D_table[] =
address StubGenerator::generate_libmCbrt() {
StubId stub_id = StubId::stubgen_dcbrt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1;
Label B1_1, B1_2, B1_4;
@@ -335,7 +341,34 @@ address StubGenerator::generate_libmCbrt() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_cbrt(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr))
+ ADD(_ABS_MASK);
+ ADD(_SIG_MASK);
+ ADD(_EXP_MASK);
+ ADD(_EXP_MSK2);
+ ADD(_EXP_MSK3);
+ ADD(_SCALE63);
+ ADD(_ZERON);
+ ADD(_INF);
+ ADD(_NEG_INF);
+ address coeff_table = (address)_coeff_table;
+ ADD(coeff_table);
+ ADD(coeff_table + 16);
+ ADD(coeff_table + 32);
+ ADD(coeff_table + 48);
+ ADD(_rcp_table);
+ ADD(_cbrt_table);
+ ADD(_D_table);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_chacha.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_chacha.cpp
index 7afaf34e031..1fa51cd2f18 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_chacha.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_chacha.cpp
@@ -111,10 +111,16 @@ void StubGenerator::generate_chacha_stubs() {
/* The 2-block AVX/AVX2-enabled ChaCha20 block function implementation */
address StubGenerator::generate_chacha20Block_avx() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_chacha20Block_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_twoRounds;
const Register state = c_rarg0;
@@ -295,15 +301,25 @@ address StubGenerator::generate_chacha20Block_avx() {
}
__ leave();
__ ret(0);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
/* The 4-block AVX512-enabled ChaCha20 block function implementation */
address StubGenerator::generate_chacha20Block_avx512() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_chacha20Block_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_twoRounds;
const Register state = c_rarg0;
@@ -466,6 +482,10 @@ address StubGenerator::generate_chacha20Block_avx512() {
__ vzeroupper();
__ leave();
__ ret(0);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -584,3 +604,13 @@ bVec,
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_chacha(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr))
+ ADD(CC20_COUNTER_ADD_AVX);
+ ADD(CC20_COUNTER_ADD_AVX512);
+ ADD(CC20_LROT_CONSTS);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_constants.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_constants.cpp
index 93fa7e650db..19e1ca680b3 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_constants.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_constants.cpp
@@ -233,3 +233,30 @@ ATTRIBUTE_ALIGNED(16) static const juint _Ctable[] = {
};
address StubGenerator::Ctable = (address)_Ctable;
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_constants(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr))
+ ADD(_ONE);
+ ADD(_ONEHALF);
+ ADD(_SIGN_MASK);
+ ADD(_TWO_POW_55);
+ ADD(_TWO_POW_M55);
+ ADD(_SHIFTER);
+ ADD(_ZERO);
+ ADD(_SC_1);
+ ADD(_SC_2);
+ ADD(_SC_3);
+ ADD(_SC_4);
+ // Use value which was already cast to (address): StubGenerator::PI_4;
+ ADD(PI_4);
+ ADD(PI_4 + 8);
+ ADD(_PI32INV);
+ ADD(_NEG_ZERO);
+ ADD(_P_1);
+ ADD(_P_2);
+ ADD(_P_3);
+ ADD(_PI_INV_TABLE);
+ ADD(_Ctable);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_cos.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_cos.cpp
index 8cb6ead21fd..8dedd50cd97 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_cos.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_cos.cpp
@@ -174,8 +174,14 @@
address StubGenerator::generate_libmCos() {
StubId stub_id = StubId::stubgen_dcos_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
@@ -619,6 +625,9 @@ address StubGenerator::generate_libmCos() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_dilithium.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_dilithium.cpp
index b9590939468..de8f52a3c05 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_dilithium.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_dilithium.cpp
@@ -401,10 +401,16 @@ static void storeXmms(Register destination, int offset, const XMMRegister xmmReg
//
static address generate_dilithiumAlmostNtt_avx(StubGenerator *stubgen,
int vector_len, MacroAssembler *_masm) {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumAlmostNtt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register coeffs = c_rarg0;
@@ -646,6 +652,9 @@ static address generate_dilithiumAlmostNtt_avx(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -657,10 +666,16 @@ static address generate_dilithiumAlmostNtt_avx(StubGenerator *stubgen,
// zetas (int[128*8]) = c_rarg1
static address generate_dilithiumAlmostInverseNtt_avx(StubGenerator *stubgen,
int vector_len, MacroAssembler *_masm) {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumAlmostInverseNtt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register coeffs = c_rarg0;
@@ -886,6 +901,9 @@ static address generate_dilithiumAlmostInverseNtt_avx(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -900,10 +918,16 @@ static address generate_dilithiumAlmostInverseNtt_avx(StubGenerator *stubgen,
static address generate_dilithiumNttMult_avx(StubGenerator *stubgen,
int vector_len, MacroAssembler *_masm) {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumNttMult_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
Label L_loop;
@@ -972,6 +996,9 @@ static address generate_dilithiumNttMult_avx(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -984,10 +1011,16 @@ static address generate_dilithiumNttMult_avx(StubGenerator *stubgen,
static address generate_dilithiumMontMulByConstant_avx(StubGenerator *stubgen,
int vector_len, MacroAssembler *_masm) {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumMontMulByConstant_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
Label L_loop;
@@ -1059,6 +1092,9 @@ static address generate_dilithiumMontMulByConstant_avx(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1073,10 +1109,16 @@ static address generate_dilithiumMontMulByConstant_avx(StubGenerator *stubgen,
// multiplier (int) = c_rarg4
static address generate_dilithiumDecomposePoly_avx(StubGenerator *stubgen,
int vector_len, MacroAssembler *_masm) {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_dilithiumDecomposePoly_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
Label L_loop;
@@ -1318,6 +1360,9 @@ static address generate_dilithiumDecomposePoly_avx(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1340,3 +1385,21 @@ void StubGenerator::generate_dilithium_stubs() {
generate_dilithiumDecomposePoly_avx(this, vector_len, _masm);
}
}
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_dilithium(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr))
+ // use accessors to correctly identify the relevant addresses
+ ADD(unshufflePermsAddr(0));
+ ADD(unshufflePermsAddr(1));
+ ADD(unshufflePermsAddr(2));
+ ADD(unshufflePermsAddr(3));
+ ADD(unshufflePermsAddr(4));
+ ADD(unshufflePermsAddr(5));
+ ADD(dilithiumAvx512ConstsAddr(montQInvModRIdx));
+ ADD(dilithiumAvx512ConstsAddr(dilithium_qIdx));
+ ADD(dilithiumAvx512ConstsAddr(montRSquareModQIdx));
+ ADD(dilithiumAvx512ConstsAddr(barrettAddendIdx));
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_exp.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_exp.cpp
index 5130fd2c9d2..3c8babcbecf 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_exp.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_exp.cpp
@@ -166,8 +166,14 @@ ATTRIBUTE_ALIGNED(4) static const juint _INF[] =
address StubGenerator::generate_libmExp() {
StubId stub_id = StubId::stubgen_dexp_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
@@ -381,7 +387,32 @@ address StubGenerator::generate_libmExp() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_exp(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ address cv = (address)_cv;
+ ADD(cv);
+ ADD(cv + 16);
+ ADD(cv + 32);
+ ADD(cv + 48);
+ ADD(cv + 64);
+ ADD(cv + 80);
+ ADD(_mmask);
+ ADD(_bias);
+ ADD(_Tbl_addr);
+ ADD(_ALLONES);
+ ADD(_ebias);
+ ADD(_XMAX);
+ ADD(_XMIN);
+ ADD(_INF);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp
index b1eaa4b8031..f53985a13b7 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp
@@ -72,13 +72,19 @@ ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_e307[] = {
};
address StubGenerator::generate_libmFmod() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_fmod_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
- if (VM_Version::supports_avx512vlbwdq()) { // AVX512 version
+ if (VM_Version::supports_avx512vlbwdq() && VM_Version::supports_fma()) { // AVX512 version
// Source used to generate the AVX512 fmod assembly below:
//
@@ -521,7 +527,22 @@ address StubGenerator::generate_libmFmod() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_fmod(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ ADD(CONST_NaN);
+ ADD(CONST_1p260);
+ ADD(CONST_MAX);
+ ADD(CONST_INF);
+ ADD(CONST_e307);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp
index 6f05b1ab5e6..9ebab07589e 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp
@@ -80,11 +80,17 @@ void StubGenerator::generate_ghash_stubs() {
// Single and multi-block ghash operations.
address StubGenerator::generate_ghash_processBlocks() {
- __ align(CodeEntryAlignment);
- Label L_ghash_loop, L_exit;
StubId stub_id = StubId::stubgen_ghash_processBlocks_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ Label L_ghash_loop, L_exit;
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register state = c_rarg0;
const Register subkeyH = c_rarg1;
@@ -211,17 +217,25 @@ address StubGenerator::generate_ghash_processBlocks() {
__ leave();
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
// Ghash single and multi block operations using AVX instructions
address StubGenerator::generate_avx_ghash_processBlocks() {
- __ align(CodeEntryAlignment);
-
StubId stub_id = StubId::stubgen_ghash_processBlocks_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
// arguments
const Register state = c_rarg0;
@@ -237,6 +251,9 @@ address StubGenerator::generate_avx_ghash_processBlocks() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -538,3 +555,14 @@ void StubGenerator::generateHtbl_eight_blocks(Register htbl) {
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_ghash(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ ADD(GHASH_SHUFFLE_MASK);
+ ADD(GHASH_LONG_SWAP_MASK);
+ ADD(GHASH_BYTE_SWAP_MASK);
+ ADD(GHASH_POLYNOMIAL);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp
index 7d5dee6a5df..347a9b936a8 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp
@@ -400,10 +400,16 @@ static int xmm29_29[] = {29, 29, 29, 29};
// ntt_zetas (short[256]) = c_rarg1
address generate_kyberNtt_avx512(StubGenerator *stubgen,
MacroAssembler *_masm) {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberNtt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register coeffs = c_rarg0;
@@ -487,6 +493,9 @@ address generate_kyberNtt_avx512(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -496,11 +505,16 @@ address generate_kyberNtt_avx512(StubGenerator *stubgen,
// ntt_zetas (short[256]) = c_rarg1
address generate_kyberInverseNtt_avx512(StubGenerator *stubgen,
MacroAssembler *_masm) {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberInverseNtt_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register coeffs = c_rarg0;
@@ -610,6 +624,9 @@ address generate_kyberInverseNtt_avx512(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -621,11 +638,16 @@ address generate_kyberInverseNtt_avx512(StubGenerator *stubgen,
// zetas (short[128]) = c_rarg3
address generate_kyberNttMult_avx512(StubGenerator *stubgen,
MacroAssembler *_masm) {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberNttMult_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register result = c_rarg0;
@@ -731,6 +753,9 @@ address generate_kyberNttMult_avx512(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -741,11 +766,16 @@ address generate_kyberNttMult_avx512(StubGenerator *stubgen,
// b (short[256]) = c_rarg2
address generate_kyberAddPoly_2_avx512(StubGenerator *stubgen,
MacroAssembler *_masm) {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberAddPoly_2_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register result = c_rarg0;
@@ -776,6 +806,9 @@ address generate_kyberAddPoly_2_avx512(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -787,11 +820,16 @@ address generate_kyberAddPoly_2_avx512(StubGenerator *stubgen,
// c (short[256]) = c_rarg3
address generate_kyberAddPoly_3_avx512(StubGenerator *stubgen,
MacroAssembler *_masm) {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberAddPoly_3_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register result = c_rarg0;
@@ -830,6 +868,9 @@ address generate_kyberAddPoly_3_avx512(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -841,11 +882,16 @@ address generate_kyberAddPoly_3_avx512(StubGenerator *stubgen,
// parsedLength (int) = c_rarg3
address generate_kyber12To16_avx512(StubGenerator *stubgen,
MacroAssembler *_masm) {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyber12To16_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register condensed = c_rarg0;
@@ -984,6 +1030,9 @@ address generate_kyber12To16_avx512(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -993,11 +1042,16 @@ address generate_kyber12To16_avx512(StubGenerator *stubgen,
// coeffs (short[256]) = c_rarg0
address generate_kyberBarrettReduce_avx512(StubGenerator *stubgen,
MacroAssembler *_masm) {
-
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_kyberBarrettReduce_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
const Register coeffs = c_rarg0;
@@ -1021,6 +1075,9 @@ address generate_kyberBarrettReduce_avx512(StubGenerator *stubgen,
__ mov64(rax, 0); // return 0
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1038,3 +1095,24 @@ void StubGenerator::generate_kyber_stubs() {
}
}
}
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_kyber(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr))
+ // use accessors to correctly identify the relevant addresses
+ ADD(kyberAvx512NttPermsAddr());
+ ADD(kyberAvx512InverseNttPermsAddr());
+ ADD(kyberAvx512_nttMultPermsAddr());
+ ADD(kyberAvx512_12To16PermsAddr());
+ ADD(kyberAvx512_12To16DupAddr());
+ ADD(kyberAvx512_12To16ShiftAddr());
+ ADD(kyberAvx512_12To16AndAddr());
+ ADD(kyberAvx512ConstsAddr(qOffset));
+ ADD(kyberAvx512ConstsAddr(qInvModROffset));
+ ADD(kyberAvx512ConstsAddr(dimHalfInverseOffset));
+ ADD(kyberAvx512ConstsAddr(barretMultiplierOffset));
+ ADD(kyberAvx512ConstsAddr(montRSquareModqOffset));
+ ADD(kyberAvx512ConstsAddr(f00Offset));
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_log.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_log.cpp
index 6b5b4d704e3..07683a51e3d 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_log.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_log.cpp
@@ -177,8 +177,14 @@ ATTRIBUTE_ALIGNED(16) static const juint _coeff[] =
address StubGenerator::generate_libmLog() {
StubId stub_id = StubId::stubgen_dlog_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
@@ -359,6 +365,9 @@ address StubGenerator::generate_libmLog() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -516,8 +525,14 @@ ATTRIBUTE_ALIGNED(16) static const juint _coeff_log10[] =
address StubGenerator::generate_libmLog10() {
StubId stub_id = StubId::stubgen_dlog10_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
@@ -704,7 +719,38 @@ address StubGenerator::generate_libmLog10() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_log(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ address log2 = (address)_log2;
+ address coeff = (address)_coeff;
+ address LOG10_E = (address)_LOG10_E;
+ address log2_log10 = (address)_log2_log10;
+ address coeff_log10 = (address)_coeff_log10;
+
+ ADD(_L_tbl);
+ ADD(log2);
+ ADD(log2 + 8);
+ ADD(coeff);
+ ADD(coeff + 16);
+ ADD(coeff + 32);
+ ADD(_HIGHSIGMASK_log10);
+ ADD(LOG10_E);
+ ADD(LOG10_E + 8);
+ ADD(_L_tbl_log10);
+ ADD(log2_log10);
+ ADD(log2_log10 + 8);
+ ADD(coeff_log10);
+ ADD(coeff_log10 + 16);
+ ADD(coeff_log10 + 32);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly1305.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly1305.cpp
index c80b2d16181..ea7e6d64254 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly1305.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly1305.cpp
@@ -909,10 +909,16 @@ void StubGenerator::poly1305_process_blocks_avx512(
// After execution, input and length will point at remaining (unprocessed) data
// and accumulator will point to the current accumulator value
address StubGenerator::generate_poly1305_processBlocks() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_poly1305_processBlocks_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
// Save all 'SOE' registers
@@ -1028,6 +1034,10 @@ address StubGenerator::generate_poly1305_processBlocks() {
__ leave();
__ ret(0);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -1695,3 +1705,14 @@ void StubGenerator::poly1305_msg_mul_reduce_vec4_avx2(
__ vpaddq(A1, A1, YTMP2, Assembler::AVX_256bit); //Add medium 42-bit bits from new blocks to accumulator
__ vpaddq(A1, A1, YTMP5, Assembler::AVX_256bit);
}
+#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_poly1305(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ ADD(POLY1305_PAD_MSG);
+ ADD(POLY1305_MASK42);
+ ADD(POLY1305_MASK44);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp
index c439e0b370f..308a8042993 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp
@@ -558,10 +558,16 @@ void montgomeryMultiplyAVX2(const Register aLimbs, const Register bLimbs, const
}
address StubGenerator::generate_intpoly_montgomeryMult_P256() {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_intpoly_montgomeryMult_P256_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
if (VM_Version::supports_avx512ifma() && VM_Version::supports_avx512vlbw()) {
@@ -620,6 +626,10 @@ address StubGenerator::generate_intpoly_montgomeryMult_P256() {
__ leave();
__ ret(0);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -680,10 +690,16 @@ address StubGenerator::generate_intpoly_assign() {
// P521OrderField: 19 = 8 + 8 + 2 + 1
// Special Cases 5, 10, 14, 16, 19
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_intpoly_assign_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
__ enter();
// Inputs
@@ -762,5 +778,24 @@ address StubGenerator::generate_intpoly_assign() {
__ bind(L_Done);
__ leave();
__ ret(0);
+
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
+#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_poly_mont(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ // use accessors to retrieve all correct addresses
+ ADD(shift_1L());
+ ADD(shift_1R());
+ ADD(p256_mask52());
+ ADD(mask_limb5());
+ ADD(modulus_p256());
+ ADD(modulus_p256(1));
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_pow.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_pow.cpp
index 3c3df7e6ac4..a9a6dc10da4 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_pow.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_pow.cpp
@@ -760,8 +760,14 @@ ATTRIBUTE_ALIGNED(8) static const juint _DOUBLE0DOT5[] = {
address StubGenerator::generate_libmPow() {
StubId stub_id = StubId::stubgen_dpow_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
@@ -1859,7 +1865,45 @@ address StubGenerator::generate_libmPow() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_pow(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ address HIGHMASK_Y = (address)_HIGHMASK_Y;
+ address e_coeff = (address)_e_coeff;
+ address coeff_h = (address)_coeff_h;
+ address coeff_pow = (address)_coeff_pow;
+
+ ADD(_HIGHSIGMASK);
+ ADD(_LOG2_E);
+ ADD(HIGHMASK_Y);
+ ADD(HIGHMASK_Y + 8);
+ ADD(_T_exp);
+ ADD(e_coeff);
+ ADD(e_coeff + 16);
+ ADD(e_coeff + 32);
+ ADD(coeff_h);
+ ADD(coeff_h + 8);
+ ADD(_HIGHMASK_LOG_X);
+ ADD(_HALFMASK);
+ ADD(coeff_pow);
+ ADD(coeff_pow + 16);
+ ADD(coeff_pow + 32);
+ ADD(coeff_pow + 48);
+ ADD(coeff_pow + 64);
+ ADD(coeff_pow + 80);
+ ADD(_L_tbl_pow);
+ ADD(_log2_pow);
+ ADD(_DOUBLE2);
+ ADD(_DOUBLE0);
+ ADD(_DOUBLE0DOT5);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_sha3.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_sha3.cpp
index f9d876f34f3..58f81652a0c 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_sha3.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_sha3.cpp
@@ -104,10 +104,15 @@ static address generate_sha3_implCompress(StubId stub_id,
default:
ShouldNotReachHere();
}
-
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
__ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register buf = c_rarg0;
const Register state = c_rarg1;
@@ -316,6 +321,9 @@ static address generate_sha3_implCompress(StubId stub_id,
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -326,10 +334,16 @@ static address generate_sha3_implCompress(StubId stub_id,
// Performs two keccak() computations in parallel. The steps of the
// two computations are executed interleaved.
static address generate_double_keccak(StubGenerator *stubgen, MacroAssembler *_masm) {
- __ align(CodeEntryAlignment);
StubId stub_id = StubId::stubgen_double_keccak_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = stubgen->load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
+ __ align(CodeEntryAlignment);
StubCodeMark mark(stubgen, stub_id);
- address start = __ pc();
+ start = __ pc();
const Register state0 = c_rarg0;
const Register state1 = c_rarg1;
@@ -495,6 +509,9 @@ static address generate_double_keccak(StubGenerator *stubgen, MacroAssembler *_m
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ stubgen->store_archive_data(stub_id, start, __ pc());
+
return start;
}
@@ -508,3 +525,14 @@ void StubGenerator::generate_sha3_stubs() {
generate_sha3_implCompress(StubId::stubgen_sha3_implCompressMB_id, this, _masm);
}
}
+
+#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_sha3(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ ADD(round_constsAddr());
+ ADD(permsAndRotsAddr());
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_sin.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_sin.cpp
index 5290e737581..00c759a369b 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_sin.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_sin.cpp
@@ -181,8 +181,14 @@ ATTRIBUTE_ALIGNED(8) static const juint _ALL_ONES[] =
address StubGenerator::generate_libmSin() {
StubId stub_id = StubId::stubgen_dsin_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
@@ -645,7 +651,18 @@ address StubGenerator::generate_libmSin() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_sin(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ ADD(_ALL_ONES);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_sinh.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_sinh.cpp
index 86e4ac20176..9969866cfc7 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_sinh.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_sinh.cpp
@@ -290,8 +290,14 @@ ATTRIBUTE_ALIGNED(16) static const juint _T2_neg_f[] =
address StubGenerator::generate_libmSinh() {
StubId stub_id = StubId::stubgen_dsinh_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_3_0_2, L_2TAG_PACKET_4_0_2;
Label L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2;
@@ -519,7 +525,36 @@ address StubGenerator::generate_libmSinh() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_sinh(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ address L2E = (address)_L2E;
+ address cv = (address)_cv;
+ address pv = (address)_pv;
+
+ ADD(L2E);
+ ADD(L2E + 8);
+ ADD(_HALFMASK);
+ ADD(_Shifter);
+ ADD(cv);
+ ADD(cv + 16);
+ ADD(cv + 32);
+ ADD(cv + 48);
+ ADD(cv + 64);
+ ADD(_T2f);
+ ADD(_T2_neg_f);
+ ADD(pv);
+ ADD(pv + 16);
+ ADD(pv + 32);
+ ADD(_MASK3);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_tan.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_tan.cpp
index 4f14414652c..9f91b9e8f84 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_tan.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_tan.cpp
@@ -456,8 +456,14 @@ ATTRIBUTE_ALIGNED(8) static const juint _QQ_2_tan[] =
address StubGenerator::generate_libmTan() {
StubId stub_id = StubId::stubgen_dtan_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
@@ -1025,7 +1031,35 @@ address StubGenerator::generate_libmTan() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_tan(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ address PI_4_tan = (address)_PI_4_tan;
+
+ ADD(_MUL16);
+ ADD(_sign_mask_tan);
+ ADD(_PI32INV_tan);
+ ADD(_P_1_tan);
+ ADD(_P_2_tan);
+ ADD(_P_3_tan);
+ ADD(_Ctable_tan);
+ ADD(_MASK_35_tan);
+ ADD(_Q_11_tan);
+ ADD(_Q_9_tan);
+ ADD(_Q_7_tan);
+ ADD(_Q_5_tan);
+ ADD(_Q_3_tan);
+ ADD(PI_4_tan);
+ ADD(PI_4_tan + 8);
+ ADD(_QQ_2_tan);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp
index dce4fbfc455..4f2fe8a460b 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_tanh.cpp
@@ -303,8 +303,14 @@ ATTRIBUTE_ALIGNED(16) static const juint _T2_neg_f[] =
address StubGenerator::generate_libmTanh() {
StubId stub_id = StubId::stubgen_dtanh_id;
+ int entry_count = StubInfo::entry_count(stub_id);
+ assert(entry_count == 1, "sanity check");
+ address start = load_archive_data(stub_id);
+ if (start != nullptr) {
+ return start;
+ }
StubCodeMark mark(this, stub_id);
- address start = __ pc();
+ start = __ pc();
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1;
@@ -495,7 +501,36 @@ address StubGenerator::generate_libmTanh() {
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
+ // record the stub entry and end
+ store_archive_data(stub_id, start, __ pc());
+
return start;
}
#undef __
+
+#if INCLUDE_CDS
+void StubGenerator::init_AOTAddressTable_tanh(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ address L2E = (address)_L2E;
+ address cv = (address)_cv;
+ address pv = (address)_pv;
+
+ ADD(L2E);
+ ADD(L2E + 8);
+ ADD(_HALFMASK);
+ ADD(_ONEMASK);
+ ADD(_TWOMASK);
+ ADD(_Shifter);
+ ADD(cv);
+ ADD(cv + 16);
+ ADD(cv + 32);
+ ADD(_T2_neg_f);
+ ADD(pv);
+ ADD(pv + 16);
+ ADD(pv + 32);
+ ADD(_MASK3);
+ ADD(_RMASK);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.cpp b/src/hotspot/cpu/x86/stubRoutines_x86.cpp
index ee9cea08e64..ce11925dde2 100644
--- a/src/hotspot/cpu/x86/stubRoutines_x86.cpp
+++ b/src/hotspot/cpu/x86/stubRoutines_x86.cpp
@@ -28,6 +28,10 @@
#include "runtime/stubRoutines.hpp"
#include "utilities/globalDefinitions.hpp"
#include "crc32c.h"
+#include "stubGenerator_x86_64.hpp"
+#ifdef COMPILER1
+#include "c1/c1_LIRAssembler.hpp"
+#endif
// Implementation of the platform-specific part of StubRoutines - for
// a description of how to extend it, see the stubRoutines.hpp file.
@@ -40,8 +44,12 @@
#define DEFINE_ARCH_ENTRY_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
address StubRoutines:: arch :: STUB_FIELD_NAME(field_name) = CAST_FROM_FN_PTR(address, init_function);
-STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY, DEFINE_ARCH_ENTRY_INIT)
+#define DEFINE_ARCH_ENTRY_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ address StubRoutines:: arch :: STUB_FIELD_NAME(field_name) [count];
+STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY, DEFINE_ARCH_ENTRY_INIT, DEFINE_ARCH_ENTRY_ARRAY)
+
+#undef DEFINE_ARCH_ENTRY_ARRAY
#undef DEFINE_ARCH_ENTRY_INIT
#undef DEFINE_ARCH_ENTRY
@@ -411,3 +419,46 @@ ATTRIBUTE_ALIGNED(64) const julong StubRoutines::x86::_k512_W[] =
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
};
+
+#if INCLUDE_CDS
+
+void StubRoutines::init_AOTAddressTable() {
+ ResourceMark rm;
+ GrowableArray external_addresses;
+ // publish static addresses referred to by main x86 generator and
+ // auxiliary x86 generators
+ StubGenerator::init_AOTAddressTable(external_addresses);
+ // publish external data addresses defined in nested x86 class
+ StubRoutines::x86::init_AOTAddressTable(external_addresses);
+#ifdef COMPILER1
+ LIR_Assembler::init_AOTAddressTable(external_addresses);
+#endif
+ AOTCodeCache::publish_external_addresses(external_addresses);
+}
+
+// publish addresses of external data defined in this file which may
+// be referenced from stub or code
+void StubRoutines::x86::init_AOTAddressTable(GrowableArray& external_addresses) {
+#define ADD(addr) external_addresses.append((address)(addr));
+ ADD(&_mxcsr_std);
+ ADD(&_mxcsr_rz);
+ ADD(crc_by128_masks_addr());
+ ADD(crc_by128_masks_addr() + 16);
+ ADD(crc_by128_masks_addr() + 32);
+ // this is added in generic code
+ // ADD(_crc_table);
+ ADD(crc_by128_masks_avx512_addr());
+ ADD(crc_by128_masks_avx512_addr() + 16);
+ ADD(crc_by128_masks_avx512_addr() + 32);
+ ADD(_crc_table_avx512);
+ ADD(_crc32c_table_avx512);
+ ADD(_shuf_table_crc32_avx512);
+ // n.b. call accessor for this one to ensure the table is generated
+ ADD(crc32c_table_addr());
+ ADD(_arrays_hashcode_powers_of_31);
+ ADD(_k256);
+ ADD(_k256_W);
+ ADD(_k512_W);
+#undef ADD
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.hpp b/src/hotspot/cpu/x86/stubRoutines_x86.hpp
index 3654b644131..7283798888b 100644
--- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp
+++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp
@@ -55,9 +55,13 @@ class x86 {
#define DECLARE_ARCH_ENTRY_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
DECLARE_ARCH_ENTRY(arch, blob_name, stub_name, field_name, getter_name)
-private:
- STUBGEN_ARCH_ENTRIES_DO(DECLARE_ARCH_ENTRY, DECLARE_ARCH_ENTRY_INIT)
+#define DECLARE_ARCH_ENTRY_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ static address STUB_FIELD_NAME(field_name) [count] ;
+private:
+ STUBGEN_ARCH_ENTRIES_DO(DECLARE_ARCH_ENTRY, DECLARE_ARCH_ENTRY_INIT, DECLARE_ARCH_ENTRY_ARRAY)
+
+#undef DECLARE_ARCH_ENTRY_ARRAY
#undef DECLARE_ARCH_ENTRY_INIT
#undef DECLARE_ARCH_ENTRY
@@ -70,9 +74,13 @@ private:
#define DEFINE_ARCH_ENTRY_GETTER_INIT(arch, blob_name, stub_name, field_name, getter_name, init_function) \
DEFINE_ARCH_ENTRY_GETTER(arch, blob_name, stub_name, field_name, getter_name)
-public:
- STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY_GETTER, DEFINE_ARCH_ENTRY_GETTER_INIT)
+#define DEFINE_ARCH_ENTRY_GETTER_ARRAY(arch, blob_name, stub_name, field_name, getter_name, count) \
+ static address getter_name(int idx) { return STUB_FIELD_NAME(field_name) [idx]; }
+public:
+ STUBGEN_ARCH_ENTRIES_DO(DEFINE_ARCH_ENTRY_GETTER, DEFINE_ARCH_ENTRY_GETTER_INIT, DEFINE_ARCH_ENTRY_GETTER_ARRAY)
+
+#undef DEFINE_ARCH_ENTRY_GETTER_ARRAY
#undef DEFINE_ARCH_ENTRY_GETTER_INIT
#undef DEFINE_ARCH_GETTER_ENTRY
@@ -112,6 +120,8 @@ public:
static address arrays_hashcode_powers_of_31() { return (address)_arrays_hashcode_powers_of_31; }
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
+
+ static void init_AOTAddressTable(GrowableArray& external_addresses);
};
#endif // CPU_X86_STUBROUTINES_X86_HPP
diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
index 747daefd51d..cf9de40a237 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -48,7 +48,7 @@ int VM_Version::_stepping;
bool VM_Version::_has_intel_jcc_erratum;
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
-#define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
+#define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name),
const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
#undef DECLARE_CPU_FEATURE_NAME
@@ -143,7 +143,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
- Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
+ Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning, apx_xstate;
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@@ -468,6 +468,20 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movq(Address(rsi, 0), r16);
__ movq(Address(rsi, 8), r31);
+ //
+ // Query CPUID 0xD.19 for APX XSAVE offset
+ // Extended State Enumeration Sub-leaf 19 (APX)
+ // EAX = size of APX state (should be 128)
+ // EBX = offset in standard XSAVE format
+ //
+ __ movl(rax, 0xD);
+ __ movl(rcx, 19);
+ __ cpuid();
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_size_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_offset_offset())));
+ __ movl(Address(rsi, 0), rbx);
+
UseAPX = save_apx;
__ bind(vector_save_restore);
//
@@ -921,8 +935,9 @@ void VM_Version::get_processor_features() {
// Check if processor has Intel Ecore
if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
- (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
- _model == 0xCC || _model == 0xDD)) {
+ (supports_hybrid() ||
+ _model == 0xAF /* Xeon 6 E-cores (Sierra Forest) */ ||
+ _model == 0xDD /* Xeon 6+ E-cores (Clearwater Forest) */ )) {
FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
}
@@ -943,9 +958,17 @@ void VM_Version::get_processor_features() {
if (UseSSE < 1)
_features.clear_feature(CPU_SSE);
- //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
- if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
- UseAVX = 0;
+ // ZX cpus specific settings
+ if (is_zx() && FLAG_IS_DEFAULT(UseAVX)) {
+ if (cpu_family() == 7) {
+ if (extended_cpu_model() == 0x5B || extended_cpu_model() == 0x6B) {
+ UseAVX = 1;
+ } else if (extended_cpu_model() == 0x1B || extended_cpu_model() == 0x3B) {
+ UseAVX = 0;
+ }
+ } else if (cpu_family() == 6) {
+ UseAVX = 0;
+ }
}
// UseSSE is set to the smaller of what hardware supports and what
@@ -1071,15 +1094,36 @@ void VM_Version::get_processor_features() {
}
}
- // Currently APX support is only enabled for targets supporting AVX512VL feature.
- bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
- if (UseAPX && !apx_supported) {
- warning("UseAPX is not supported on this CPU, setting it to false");
+ // Currently APX support is only enabled for targets supporting AVX512VL feature.
+ if (supports_apx_f() && os_supports_apx_egprs() && supports_avx512vl()) {
+ if (FLAG_IS_DEFAULT(UseAPX)) {
+ UseAPX = false; // by default UseAPX is false
+ _features.clear_feature(CPU_APX_F);
+ } else if (!UseAPX) {
+ _features.clear_feature(CPU_APX_F);
+ }
+ } else if (UseAPX) {
+ if (!FLAG_IS_DEFAULT(UseAPX)) {
+ warning("APX is not supported on this CPU, setting it to false)");
+ }
FLAG_SET_DEFAULT(UseAPX, false);
}
- if (!UseAPX) {
- _features.clear_feature(CPU_APX_F);
+ CHECK_CPU_FEATURE(supports_clmul, CLMUL);
+ CHECK_CPU_FEATURE(supports_aes, AES);
+ CHECK_CPU_FEATURE(supports_fma, FMA);
+
+ if (supports_sha() || (supports_avx2() && supports_bmi2())) {
+ if (FLAG_IS_DEFAULT(UseSHA)) {
+ UseSHA = true;
+ } else if (!UseSHA) {
+ _features.clear_feature(CPU_SHA);
+ }
+ } else if (UseSHA) {
+ if (!FLAG_IS_DEFAULT(UseSHA)) {
+ warning("SHA instructions are not available on this CPU");
+ }
+ FLAG_SET_DEFAULT(UseSHA, false);
}
if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
@@ -1129,76 +1173,61 @@ void VM_Version::get_processor_features() {
// Use AES instructions if available.
if (supports_aes()) {
- if (FLAG_IS_DEFAULT(UseAES)) {
- FLAG_SET_DEFAULT(UseAES, true);
+ if (supports_sse3()) {
+ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+ }
+ } else if (UseAESIntrinsics) {
+ // The AES intrinsic stubs require AES instruction support (of course)
+ // but also require sse3 mode or higher for instructions it use.
+ if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
+ }
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
- if (!UseAES) {
+ if (!UseAESIntrinsics) {
+ if (UseAESCTRIntrinsics) {
+ if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
+ }
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+ } else {
+ if (supports_sse4_1()) {
+ if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
+ }
+ } else if (UseAESCTRIntrinsics) {
+ // The AES-CTR intrinsic stubs require AES instruction support (of course)
+ // but also require sse4.1 mode or higher for instructions it use.
+ if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
+ }
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+ }
+ } else {
+ if (!cpu_supports_aes()) {
+ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("AES intrinsics are not available on this CPU");
+ }
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ warning("AES-CTR intrinsics are not available on this CPU");
+ }
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ } else if (!UseAES) {
if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
}
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
- } else {
- if (UseSSE > 2) {
- if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
- FLAG_SET_DEFAULT(UseAESIntrinsics, true);
- }
- } else {
- // The AES intrinsic stubs require AES instruction support (of course)
- // but also require sse3 mode or higher for instructions it use.
- if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
- warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
- }
- FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ warning("AES_CTR intrinsics require UseAES flag to be enabled. AES_CTR intrinsics will be disabled.");
}
-
- // --AES-CTR begins--
- if (!UseAESIntrinsics) {
- if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
- warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
- FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
- }
- } else {
- if (supports_sse4_1()) {
- if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
- FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
- }
- } else {
- // The AES-CTR intrinsic stubs require AES instruction support (of course)
- // but also require sse4.1 mode or higher for instructions it use.
- if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
- warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
- }
- FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
- }
- }
- // --AES-CTR ends--
- }
- } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
- if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
- warning("AES instructions are not available on this CPU");
- FLAG_SET_DEFAULT(UseAES, false);
- }
- if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
- warning("AES intrinsics are not available on this CPU");
- FLAG_SET_DEFAULT(UseAESIntrinsics, false);
- }
- if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
- warning("AES-CTR intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
}
}
- // Use CLMUL instructions if available.
- if (supports_clmul()) {
- if (FLAG_IS_DEFAULT(UseCLMUL)) {
- UseCLMUL = true;
- }
- } else if (UseCLMUL) {
- if (!FLAG_IS_DEFAULT(UseCLMUL))
- warning("CLMUL instructions not available on this CPU (AVX may also be required)");
- FLAG_SET_DEFAULT(UseCLMUL, false);
- }
-
if (UseCLMUL && (UseSSE > 2)) {
if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
UseCRC32Intrinsics = true;
@@ -1237,8 +1266,9 @@ void VM_Version::get_processor_features() {
UseGHASHIntrinsics = true;
}
} else if (UseGHASHIntrinsics) {
- if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
+ if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
+ }
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
}
@@ -1248,26 +1278,27 @@ void VM_Version::get_processor_features() {
// based on the VM capabilities whether to use an AVX2 or AVX512-enabled
// version.
if (UseAVX >= 1) {
- if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
- UseChaCha20Intrinsics = true;
- }
+ if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
+ UseChaCha20Intrinsics = true;
+ }
} else if (UseChaCha20Intrinsics) {
- if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
- warning("ChaCha20 intrinsic requires AVX instructions");
- }
- FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
+ if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
+ warning("ChaCha20 intrinsic requires AVX instructions");
+ }
+ FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
}
// Kyber Intrinsics
// Currently we only have them for AVX512
if (supports_evex() && supports_avx512bw()) {
- if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
- UseKyberIntrinsics = true;
- }
- } else
- if (UseKyberIntrinsics) {
- warning("Intrinsics for ML-KEM are not available on this CPU.");
- FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
+ if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
+ UseKyberIntrinsics = true;
+ }
+ } else if (UseKyberIntrinsics) {
+ if (!FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
+ warning("Intrinsics for ML-KEM are not available on this CPU.");
+ }
+ FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
}
// Dilithium Intrinsics
@@ -1276,8 +1307,10 @@ void VM_Version::get_processor_features() {
UseDilithiumIntrinsics = true;
}
} else if (UseDilithiumIntrinsics) {
+ if (!FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
warning("Intrinsics for ML-DSA are not available on this CPU.");
- FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
+ }
+ FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
}
// Base64 Intrinsics (Check the condition for which the intrinsic will be active)
@@ -1286,39 +1319,24 @@ void VM_Version::get_processor_features() {
UseBASE64Intrinsics = true;
}
} else if (UseBASE64Intrinsics) {
- if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
+ if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
warning("Base64 intrinsic requires EVEX instructions on this CPU");
- FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
- }
-
- if (supports_fma()) {
- if (FLAG_IS_DEFAULT(UseFMA)) {
- UseFMA = true;
}
- } else if (UseFMA) {
- warning("FMA instructions are not available on this CPU");
- FLAG_SET_DEFAULT(UseFMA, false);
+ FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
}
if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
UseMD5Intrinsics = true;
}
- if (supports_sha() || (supports_avx2() && supports_bmi2())) {
- if (FLAG_IS_DEFAULT(UseSHA)) {
- UseSHA = true;
- }
- } else if (UseSHA) {
- warning("SHA instructions are not available on this CPU");
- FLAG_SET_DEFAULT(UseSHA, false);
- }
-
if (supports_sha() && supports_sse4_1() && UseSHA) {
if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
}
} else if (UseSHA1Intrinsics) {
- warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+ if (!FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+ }
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
}
@@ -1327,7 +1345,9 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
}
} else if (UseSHA256Intrinsics) {
- warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+ if (!FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+ }
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
}
@@ -1336,21 +1356,21 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
}
} else if (UseSHA512Intrinsics) {
- warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
+ if (!FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
+ }
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
- if (supports_evex() && supports_avx512bw()) {
- if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
- UseSHA3Intrinsics = true;
- }
+ if (UseSHA && supports_evex() && supports_avx512bw()) {
+ if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
+ }
} else if (UseSHA3Intrinsics) {
+ if (!FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
- FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
- }
-
- if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
- FLAG_SET_DEFAULT(UseSHA, false);
+ }
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
}
#if COMPILER2_OR_JVMCI
@@ -1408,7 +1428,9 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
}
} else if (UsePoly1305Intrinsics) {
- warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
+ if (!FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
+ warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
+ }
FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
}
@@ -1417,7 +1439,9 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
}
} else if (UseIntPolyIntrinsics) {
- warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
+ if (!FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
+ warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
+ }
FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
}
@@ -1481,9 +1505,6 @@ void VM_Version::get_processor_features() {
MaxLoopPad = 11;
}
#endif // COMPILER2
- if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
- UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
- }
if (supports_sse4_2()) { // new ZX cpus
if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
@@ -1501,10 +1522,6 @@ void VM_Version::get_processor_features() {
// Use it on new AMD cpus starting from Opteron.
UseAddressNop = true;
}
- if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
- // Use it on new AMD cpus starting from Opteron.
- UseNewLongLShift = true;
- }
if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
if (supports_sse4a()) {
UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
@@ -1544,10 +1561,6 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
}
- // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
- if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
- FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
- }
if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
}
@@ -1564,9 +1577,6 @@ void VM_Version::get_processor_features() {
if (cpu_family() >= 0x17) {
// On family >=17h processors use XMM and UnalignedLoadStores
// for Array Copy
- if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
- FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
- }
if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
}
@@ -1613,10 +1623,7 @@ void VM_Version::get_processor_features() {
}
#endif // COMPILER2
- if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
- UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
- }
- if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
+ if (is_intel_modern_cpu()) { // Newest Intel cpus
if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
}
@@ -1640,41 +1647,40 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
}
-#ifdef COMPILER2
- if (UseAVX > 2) {
- if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
- (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
- ArrayOperationPartialInlineSize != 0 &&
- ArrayOperationPartialInlineSize != 16 &&
- ArrayOperationPartialInlineSize != 32 &&
- ArrayOperationPartialInlineSize != 64)) {
- int inline_size = 0;
- if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
- inline_size = 64;
- } else if (MaxVectorSize >= 32) {
- inline_size = 32;
- } else if (MaxVectorSize >= 16) {
- inline_size = 16;
- }
- if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
- warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
- }
- ArrayOperationPartialInlineSize = inline_size;
- }
-
- if (ArrayOperationPartialInlineSize > MaxVectorSize) {
- ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
- if (ArrayOperationPartialInlineSize) {
- warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
- } else {
- warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
- }
- }
- }
-#endif
}
#ifdef COMPILER2
+ if (UseAVX > 2) {
+ if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
+ (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
+ ArrayOperationPartialInlineSize != 0 &&
+ ArrayOperationPartialInlineSize != 16 &&
+ ArrayOperationPartialInlineSize != 32 &&
+ ArrayOperationPartialInlineSize != 64)) {
+ int inline_size = 0;
+ if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
+ inline_size = 64;
+ } else if (MaxVectorSize >= 32) {
+ inline_size = 32;
+ } else if (MaxVectorSize >= 16) {
+ inline_size = 16;
+ }
+ if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
+ warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
+ }
+ ArrayOperationPartialInlineSize = inline_size;
+ }
+
+ if (ArrayOperationPartialInlineSize > MaxVectorSize) {
+ ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
+ if (ArrayOperationPartialInlineSize) {
+ warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
+ } else {
+ warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
+ }
+ }
+ }
+
if (FLAG_IS_DEFAULT(OptimizeFill)) {
if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
OptimizeFill = false;
@@ -1685,8 +1691,8 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
}
- } else {
- if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
+ } else if (UseSSE42Intrinsics) {
+ if (!FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
}
FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
@@ -1696,15 +1702,17 @@ void VM_Version::get_processor_features() {
UseVectorizedMismatchIntrinsic = true;
}
} else if (UseVectorizedMismatchIntrinsic) {
- if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
+ if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
warning("vectorizedMismatch intrinsics are not available on this CPU");
+ }
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}
if (UseAVX >= 2) {
FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
} else if (UseVectorizedHashCodeIntrinsic) {
- if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
+ if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
warning("vectorizedHashCode intrinsics are not available on this CPU");
+ }
FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
}
@@ -1714,7 +1722,9 @@ void VM_Version::get_processor_features() {
UseCountLeadingZerosInstruction = true;
}
} else if (UseCountLeadingZerosInstruction) {
- warning("lzcnt instruction is not available on this CPU");
+ if (!FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
+ warning("lzcnt instruction is not available on this CPU");
+ }
FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
}
@@ -1730,7 +1740,9 @@ void VM_Version::get_processor_features() {
}
}
} else if (UseCountTrailingZerosInstruction) {
- warning("tzcnt instruction is not available on this CPU");
+ if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
+ warning("tzcnt instruction is not available on this CPU");
+ }
FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
}
@@ -1741,7 +1753,9 @@ void VM_Version::get_processor_features() {
UseBMI1Instructions = true;
}
} else if (UseBMI1Instructions) {
- warning("BMI1 instructions are not available on this CPU (AVX is also required)");
+ if (!FLAG_IS_DEFAULT(UseBMI1Instructions)) {
+ warning("BMI1 instructions are not available on this CPU (AVX is also required)");
+ }
FLAG_SET_DEFAULT(UseBMI1Instructions, false);
}
@@ -1750,7 +1764,9 @@ void VM_Version::get_processor_features() {
UseBMI2Instructions = true;
}
} else if (UseBMI2Instructions) {
- warning("BMI2 instructions are not available on this CPU (AVX is also required)");
+ if (!FLAG_IS_DEFAULT(UseBMI2Instructions)) {
+ warning("BMI2 instructions are not available on this CPU (AVX is also required)");
+ }
FLAG_SET_DEFAULT(UseBMI2Instructions, false);
}
@@ -1760,7 +1776,9 @@ void VM_Version::get_processor_features() {
UsePopCountInstruction = true;
}
} else if (UsePopCountInstruction) {
- warning("POPCNT instruction is not available on this CPU");
+ if (!FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+ warning("POPCNT instruction is not available on this CPU");
+ }
FLAG_SET_DEFAULT(UsePopCountInstruction, false);
}
@@ -1770,7 +1788,9 @@ void VM_Version::get_processor_features() {
UseFastStosb = true;
}
} else if (UseFastStosb) {
- warning("fast-string operations are not available on this CPU");
+ if (!FLAG_IS_DEFAULT(UseFastStosb)) {
+ warning("fast-string operations are not available on this CPU");
+ }
FLAG_SET_DEFAULT(UseFastStosb, false);
}
@@ -1796,7 +1816,9 @@ void VM_Version::get_processor_features() {
UseXMMForObjInit = true;
}
} else if (UseXMMForObjInit) {
- warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
+ if (!FLAG_IS_DEFAULT(UseXMMForObjInit)) {
+ warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
+ }
FLAG_SET_DEFAULT(UseXMMForObjInit, false);
}
@@ -1837,7 +1859,7 @@ void VM_Version::get_processor_features() {
if (is_intel() && is_intel_server_family() && supports_sse3()) {
if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
- supports_sse4_2() && supports_ht()) { // Nehalem based cpus
+ is_intel_modern_cpu()) { // Nehalem based cpus
FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
}
#ifdef COMPILER2
@@ -1876,7 +1898,7 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
(cache_line_size > ContendedPaddingWidth))
- ContendedPaddingWidth = cache_line_size;
+ ContendedPaddingWidth = cache_line_size;
// This machine allows unaligned memory accesses
if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
@@ -1941,6 +1963,18 @@ void VM_Version::get_processor_features() {
if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
}
+ // CopyAVX3Threshold is the threshold at which 64-byte instructions are used
+ // for implementing the array copy and clear operations.
+ // The Intel platforms that supports the serialize instruction
+ // have improved implementation of 64-byte load/stores and so the default
+ // threshold is set to 0 for these platforms.
+ if (FLAG_IS_DEFAULT(CopyAVX3Threshold)) {
+ if (is_intel() && is_intel_server_family() && supports_serialize()) {
+ FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
+ } else {
+ FLAG_SET_DEFAULT(CopyAVX3Threshold, AVX3Threshold);
+ }
+ }
}
void VM_Version::print_platform_virtualization_info(outputStream* st) {
@@ -2096,17 +2130,6 @@ bool VM_Version::is_intel_darkmont() {
return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
}
-// avx3_threshold() sets the threshold at which 64-byte instructions are used
-// for implementing the array copy and clear operations.
-// The Intel platforms that supports the serialize instruction
-// has improved implementation of 64-byte load/stores and so the default
-// threshold is set to 0 for these platforms.
-int VM_Version::avx3_threshold() {
- return (is_intel_server_family() &&
- supports_serialize() &&
- FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
-}
-
void VM_Version::clear_apx_test_state() {
clear_apx_test_state_stub();
}
@@ -2605,6 +2628,23 @@ const char* VM_Version::cpu_family_description(void) {
return _family_id_intel[cpu_family_id];
}
}
+ if (is_zx()) {
+ int cpu_model_id = extended_cpu_model();
+ if (cpu_family_id == 7) {
+ switch (cpu_model_id) {
+ case 0x1B:
+ return "wudaokou";
+ case 0x3B:
+ return "lujiazui";
+ case 0x5B:
+ return "yongfeng";
+ case 0x6B:
+ return "shijidadao";
+ }
+ } else if (cpu_family_id == 6) {
+ return "zhangjiang";
+ }
+ }
if (is_hygon()) {
return "Dhyana";
}
@@ -2624,6 +2664,9 @@ int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
} else if (is_amd()) {
cpu_type = "AMD";
x64 = cpu_is_em64t() ? " AMD64" : "";
+ } else if (is_zx()) {
+ cpu_type = "Zhaoxin";
+ x64 = cpu_is_em64t() ? " x86_64" : "";
} else if (is_hygon()) {
cpu_type = "Hygon";
x64 = cpu_is_em64t() ? " AMD64" : "";
@@ -3241,9 +3284,15 @@ int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
} else {
return 128; // Athlon
}
+ } else if (is_zx()) {
+ if (supports_sse2()) {
+ return 256;
+ } else {
+ return 128;
+ }
} else { // Intel
if (supports_sse3() && is_intel_server_family()) {
- if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
+ if (is_intel_modern_cpu()) { // Nehalem based cpus
return 192;
} else if (use_watermark_prefetch) { // watermark prefetching on Core
return 384;
@@ -3279,12 +3328,50 @@ bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
int i = 0;
ss.join([&]() {
- while (i < MAX_CPU_FEATURES) {
- if (_features.supports_feature((VM_Version::Feature_Flag)i)) {
- return _features_names[i++];
+ const char* str = nullptr;
+ while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
+ if (features.supports_feature((VM_Version::Feature_Flag)i)) {
+ str = _features_names[i];
}
i += 1;
}
- return (const char*)nullptr;
+ return str;
}, ", ");
}
+
+void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
+ VM_Features* features = (VM_Features*)features_buffer;
+ insert_features_names(*features, ss);
+}
+
+void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
+ VM_Features* vm_features_set1 = (VM_Features*)features_set1;
+ VM_Features* vm_features_set2 = (VM_Features*)features_set2;
+ int i = 0;
+ ss.join([&]() {
+ const char* str = nullptr;
+ while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
+ Feature_Flag flag = (Feature_Flag)i;
+ if (vm_features_set1->supports_feature(flag) && !vm_features_set2->supports_feature(flag)) {
+ str = _features_names[i];
+ }
+ i += 1;
+ }
+ return str;
+ }, ", ");
+}
+
+int VM_Version::cpu_features_size() {
+ return sizeof(VM_Features);
+}
+
+void VM_Version::store_cpu_features(void* buf) {
+ VM_Features copy = _features;
+ copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache
+ memcpy(buf, ©, sizeof(VM_Features));
+}
+
+bool VM_Version::supports_features(void* features_buffer) {
+ VM_Features* features_to_test = (VM_Features*)features_buffer;
+ return _features.supports_features(features_to_test);
+}
diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp
index cc93ee3564e..f721635a02e 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -377,84 +377,84 @@ protected:
*/
enum Feature_Flag {
#define CPU_FEATURE_FLAGS(decl) \
- decl(CX8, "cx8", 0) /* next bits are from cpuid 1 (EDX) */ \
- decl(CMOV, "cmov", 1) \
- decl(FXSR, "fxsr", 2) \
- decl(HT, "ht", 3) \
+ decl(CX8, cx8, 0) /* next bits are from cpuid 1 (EDX) */ \
+ decl(CMOV, cmov, 1) \
+ decl(FXSR, fxsr, 2) \
+ decl(HT, ht, 3) \
\
- decl(MMX, "mmx", 4) \
- decl(3DNOW_PREFETCH, "3dnowpref", 5) /* Processor supports 3dnow prefetch and prefetchw instructions */ \
+ decl(MMX, mmx, 4) \
+ decl(3DNOW_PREFETCH, 3dnowpref, 5) /* Processor supports 3dnow prefetch and prefetchw instructions */ \
/* may not necessarily support other 3dnow instructions */ \
- decl(SSE, "sse", 6) \
- decl(SSE2, "sse2", 7) \
+ decl(SSE, sse, 6) \
+ decl(SSE2, sse2, 7) \
\
- decl(SSE3, "sse3", 8 ) /* SSE3 comes from cpuid 1 (ECX) */ \
- decl(SSSE3, "ssse3", 9 ) \
- decl(SSE4A, "sse4a", 10) \
- decl(SSE4_1, "sse4.1", 11) \
+ decl(SSE3, sse3, 8 ) /* SSE3 comes from cpuid 1 (ECX) */ \
+ decl(SSSE3, ssse3, 9 ) \
+ decl(SSE4A, sse4a, 10) \
+ decl(SSE4_1, sse4.1, 11) \
\
- decl(SSE4_2, "sse4.2", 12) \
- decl(POPCNT, "popcnt", 13) \
- decl(LZCNT, "lzcnt", 14) \
- decl(TSC, "tsc", 15) \
+ decl(SSE4_2, sse4.2, 12) \
+ decl(POPCNT, popcnt, 13) \
+ decl(LZCNT, lzcnt, 14) \
+ decl(TSC, tsc, 15) \
\
- decl(TSCINV_BIT, "tscinvbit", 16) \
- decl(TSCINV, "tscinv", 17) \
- decl(AVX, "avx", 18) \
- decl(AVX2, "avx2", 19) \
+ decl(TSCINV_BIT, tscinvbit, 16) \
+ decl(TSCINV, tscinv, 17) \
+ decl(AVX, avx, 18) \
+ decl(AVX2, avx2, 19) \
\
- decl(AES, "aes", 20) \
- decl(ERMS, "erms", 21) /* enhanced 'rep movsb/stosb' instructions */ \
- decl(CLMUL, "clmul", 22) /* carryless multiply for CRC */ \
- decl(BMI1, "bmi1", 23) \
+ decl(AES, aes, 20) \
+ decl(ERMS, erms, 21) /* enhanced 'rep movsb/stosb' instructions */ \
+ decl(CLMUL, clmul, 22) /* carryless multiply for CRC */ \
+ decl(BMI1, bmi1, 23) \
\
- decl(BMI2, "bmi2", 24) \
- decl(RTM, "rtm", 25) /* Restricted Transactional Memory instructions */ \
- decl(ADX, "adx", 26) \
- decl(AVX512F, "avx512f", 27) /* AVX 512bit foundation instructions */ \
+ decl(BMI2, bmi2, 24) \
+ decl(RTM, rtm, 25) /* Restricted Transactional Memory instructions */ \
+ decl(ADX, adx, 26) \
+ decl(AVX512F, avx512f, 27) /* AVX 512bit foundation instructions */ \
\
- decl(AVX512DQ, "avx512dq", 28) \
- decl(AVX512PF, "avx512pf", 29) \
- decl(AVX512ER, "avx512er", 30) \
- decl(AVX512CD, "avx512cd", 31) \
+ decl(AVX512DQ, avx512dq, 28) \
+ decl(AVX512PF, avx512pf, 29) \
+ decl(AVX512ER, avx512er, 30) \
+ decl(AVX512CD, avx512cd, 31) \
\
- decl(AVX512BW, "avx512bw", 32) /* Byte and word vector instructions */ \
- decl(AVX512VL, "avx512vl", 33) /* EVEX instructions with smaller vector length */ \
- decl(SHA, "sha", 34) /* SHA instructions */ \
- decl(FMA, "fma", 35) /* FMA instructions */ \
+ decl(AVX512BW, avx512bw, 32) /* Byte and word vector instructions */ \
+ decl(AVX512VL, avx512vl, 33) /* EVEX instructions with smaller vector length */ \
+ decl(SHA, sha, 34) /* SHA instructions */ \
+ decl(FMA, fma, 35) /* FMA instructions */ \
\
- decl(VZEROUPPER, "vzeroupper", 36) /* Vzeroupper instruction */ \
- decl(AVX512_VPOPCNTDQ, "avx512_vpopcntdq", 37) /* Vector popcount */ \
- decl(AVX512_VPCLMULQDQ, "avx512_vpclmulqdq", 38) /* Vector carryless multiplication */ \
- decl(AVX512_VAES, "avx512_vaes", 39) /* Vector AES instruction */ \
+ decl(VZEROUPPER, vzeroupper, 36) /* Vzeroupper instruction */ \
+ decl(AVX512_VPOPCNTDQ, avx512_vpopcntdq, 37) /* Vector popcount */ \
+ decl(AVX512_VPCLMULQDQ, avx512_vpclmulqdq, 38) /* Vector carryless multiplication */ \
+ decl(AVX512_VAES, avx512_vaes, 39) /* Vector AES instruction */ \
\
- decl(AVX512_VNNI, "avx512_vnni", 40) /* Vector Neural Network Instructions */ \
- decl(FLUSH, "clflush", 41) /* flush instruction */ \
- decl(FLUSHOPT, "clflushopt", 42) /* flusopth instruction */ \
- decl(CLWB, "clwb", 43) /* clwb instruction */ \
+ decl(AVX512_VNNI, avx512_vnni, 40) /* Vector Neural Network Instructions */ \
+ decl(FLUSH, clflush, 41) /* flush instruction */ \
+ decl(FLUSHOPT, clflushopt, 42) /* flusopth instruction */ \
+ decl(CLWB, clwb, 43) /* clwb instruction */ \
\
- decl(AVX512_VBMI2, "avx512_vbmi2", 44) /* VBMI2 shift left double instructions */ \
- decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \
- decl(HV, "hv", 46) /* Hypervisor instructions */ \
- decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */ \
- decl(RDTSCP, "rdtscp", 48) /* RDTSCP instruction */ \
- decl(RDPID, "rdpid", 49) /* RDPID instruction */ \
- decl(FSRM, "fsrm", 50) /* Fast Short REP MOV */ \
- decl(GFNI, "gfni", 51) /* Vector GFNI instructions */ \
- decl(AVX512_BITALG, "avx512_bitalg", 52) /* Vector sub-word popcount and bit gather instructions */\
- decl(F16C, "f16c", 53) /* Half-precision and single precision FP conversion instructions*/ \
- decl(PKU, "pku", 54) /* Protection keys for user-mode pages */ \
- decl(OSPKE, "ospke", 55) /* OS enables protection keys */ \
- decl(CET_IBT, "cet_ibt", 56) /* Control Flow Enforcement - Indirect Branch Tracking */ \
- decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */ \
- decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/ \
- decl(AVX_IFMA, "avx_ifma", 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \
- decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/ \
- decl(SHA512, "sha512", 61) /* SHA512 instructions*/ \
- decl(AVX512_FP16, "avx512_fp16", 62) /* AVX512 FP16 ISA support*/ \
- decl(AVX10_1, "avx10_1", 63) /* AVX10 512 bit vector ISA Version 1 support*/ \
- decl(AVX10_2, "avx10_2", 64) /* AVX10 512 bit vector ISA Version 2 support*/ \
- decl(HYBRID, "hybrid", 65) /* Hybrid architecture */
+ decl(AVX512_VBMI2, avx512_vbmi2, 44) /* VBMI2 shift left double instructions */ \
+ decl(AVX512_VBMI, avx512_vbmi, 45) /* Vector BMI instructions */ \
+ decl(HV, hv, 46) /* Hypervisor instructions */ \
+ decl(SERIALIZE, serialize, 47) /* CPU SERIALIZE */ \
+ decl(RDTSCP, rdtscp, 48) /* RDTSCP instruction */ \
+ decl(RDPID, rdpid, 49) /* RDPID instruction */ \
+ decl(FSRM, fsrm, 50) /* Fast Short REP MOV */ \
+ decl(GFNI, gfni, 51) /* Vector GFNI instructions */ \
+ decl(AVX512_BITALG, avx512_bitalg, 52) /* Vector sub-word popcount and bit gather instructions */\
+ decl(F16C, f16c, 53) /* Half-precision and single precision FP conversion instructions*/ \
+ decl(PKU, pku, 54) /* Protection keys for user-mode pages */ \
+ decl(OSPKE, ospke, 55) /* OS enables protection keys */ \
+ decl(CET_IBT, cet_ibt, 56) /* Control Flow Enforcement - Indirect Branch Tracking */ \
+ decl(CET_SS, cet_ss, 57) /* Control Flow Enforcement - Shadow Stack */ \
+ decl(AVX512_IFMA, avx512_ifma, 58) /* Integer Vector FMA instructions*/ \
+ decl(AVX_IFMA, avx_ifma, 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \
+ decl(APX_F, apx_f, 60) /* Intel Advanced Performance Extensions*/ \
+ decl(SHA512, sha512, 61) /* SHA512 instructions*/ \
+ decl(AVX512_FP16, avx512_fp16, 62) /* AVX512 FP16 ISA support*/ \
+ decl(AVX10_1, avx10_1, 63) /* AVX10 512 bit vector ISA Version 1 support*/ \
+ decl(AVX10_2, avx10_2, 64) /* AVX10 512 bit vector ISA Version 2 support*/ \
+ decl(HYBRID, hybrid, 65) /* Hybrid architecture */
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (bit),
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
@@ -516,6 +516,15 @@ protected:
int idx = index(feature);
return (_features_bitmap[idx] & bit_mask(feature)) != 0;
}
+
+ bool supports_features(VM_Features* features_to_test) {
+ for (int i = 0; i < features_bitmap_element_count(); i++) {
+ if ((_features_bitmap[i] & features_to_test->_features_bitmap[i]) != features_to_test->_features_bitmap[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
};
// CPU feature flags vector, can be affected by VM settings.
@@ -526,6 +535,10 @@ protected:
static const char* _features_names[];
+ static void clear_feature(Feature_Flag feature) {
+ _features.clear_feature(feature);
+ }
+
static void clear_cpu_features() {
_features = VM_Features();
_cpu_features = VM_Features();
@@ -676,6 +689,10 @@ protected:
// Space to save apx registers after signal handle
jlong apx_save[2]; // Save r16 and r31
+ // cpuid function 0xD, subleaf 19 (APX extended state)
+ uint32_t apx_xstate_size; // EAX: size of APX state (128)
+ uint32_t apx_xstate_offset; // EBX: offset in standard XSAVE area
+
VM_Features feature_flags() const;
// Asserts
@@ -739,6 +756,11 @@ public:
static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
static ByteSize zmm_save_offset() { return byte_offset_of(CpuidInfo, zmm_save); }
static ByteSize apx_save_offset() { return byte_offset_of(CpuidInfo, apx_save); }
+ static ByteSize apx_xstate_offset_offset() { return byte_offset_of(CpuidInfo, apx_xstate_offset); }
+ static ByteSize apx_xstate_size_offset() { return byte_offset_of(CpuidInfo, apx_xstate_size); }
+
+ static uint32_t apx_xstate_offset() { return _cpuid_info.apx_xstate_offset; }
+ static uint32_t apx_xstate_size() { return _cpuid_info.apx_xstate_size; }
// The value used to check ymm register after signal handle
static int ymm_test_value() { return 0xCAFEBABE; }
@@ -810,7 +832,7 @@ public:
static uint32_t cpu_stepping() { return _cpuid_info.cpu_stepping(); }
static int cpu_family() { return _cpu;}
static bool is_P6() { return cpu_family() >= 6; }
- static bool is_intel_server_family() { return cpu_family() == 6 || cpu_family() == 19; }
+ static bool is_intel_server_family() { return cpu_family() == 6 || cpu_family() == 18 || cpu_family() == 19; }
static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
static bool is_hygon() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x6F677948; } // 'ogyH'
static bool is_amd_family() { return is_amd() || is_hygon(); }
@@ -912,6 +934,7 @@ public:
// Feature identification not affected by VM flags
//
static bool cpu_supports_evex() { return _cpu_features.supports_feature(CPU_AVX512F); }
+ static bool cpu_supports_aes() { return _cpu_features.supports_feature(CPU_AES); }
static bool supports_avx512_simd_sort() {
if (supports_avx512dq()) {
@@ -940,7 +963,11 @@ public:
static bool is_intel_darkmont();
- static int avx3_threshold();
+ static bool is_intel_modern_cpu() {
+ precond(is_intel()); // should be called only for intel CPU
+ // Efficient cores in hybrid CPU may not support hyper-threads.
+ return (supports_avx() || (supports_sse4_2() && (supports_ht() || supports_hybrid())));
+ }
static bool is_intel_tsc_synched_at_init();
@@ -1094,6 +1121,20 @@ public:
static bool supports_tscinv_ext(void);
static void initialize_cpu_information(void);
+
+ static void get_cpu_features_name(void* features_buffer, stringStream& ss);
+
+ // Returns names of features present in features_set1 but not in features_set2
+ static void get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss);
+
+ // Returns number of bytes required to store cpu features representation
+ static int cpu_features_size();
+
+ // Stores cpu features representation in the provided buffer. This representation is arch dependent.
+ // Size of the buffer must be same as returned by cpu_features_size()
+ static void store_cpu_features(void* buf);
+
+ static bool supports_features(void* features_to_test);
};
#endif // CPU_X86_VM_VERSION_X86_HPP
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index 93b306c37d6..eaa88d900c7 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -1699,92 +1699,108 @@ static void emit_cmpfp_fixup(MacroAssembler* masm) {
}
static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
+ // If any floating point comparison instruction is used, unordered case always triggers jump
+ // for below condition, CF=1 is true when at least one input is NaN
Label done;
__ movl(dst, -1);
- __ jcc(Assembler::parity, done);
__ jcc(Assembler::below, done);
__ setcc(Assembler::notEqual, dst);
__ bind(done);
}
-// Math.min() # Math.max()
-// --------------------------
-// ucomis[s/d] #
-// ja -> b # a
-// jp -> NaN # NaN
-// jb -> a # b
-// je #
-// |-jz -> a | b # a & b
-// | -> a #
+enum FP_PREC {
+ fp_prec_hlf,
+ fp_prec_flt,
+ fp_prec_dbl
+};
+
+static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
+ XMMRegister p, XMMRegister q) {
+ if (pt == fp_prec_hlf) {
+ __ evucomish(p, q);
+ } else if (pt == fp_prec_flt) {
+ __ ucomiss(p, q);
+ } else {
+ __ ucomisd(p, q);
+ }
+}
+
+static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
+ XMMRegister dst, XMMRegister src, Register scratch) {
+ if (pt == fp_prec_hlf) {
+ __ movhlf(dst, src, scratch);
+ } else if (pt == fp_prec_flt) {
+ __ movflt(dst, src);
+ } else {
+ __ movdbl(dst, src);
+ }
+}
+
+// Math.min() # Math.max()
+// -----------------------------
+// (v)ucomis[h/s/d] #
+// ja -> b # a
+// jp -> NaN # NaN
+// jb -> a # b
+// je #
+// |-jz -> a | b # a & b
+// | -> a #
static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
XMMRegister a, XMMRegister b,
XMMRegister xmmt, Register rt,
- bool min, bool single) {
+ bool min, enum FP_PREC pt) {
Label nan, zero, below, above, done;
- if (single)
- __ ucomiss(a, b);
- else
- __ ucomisd(a, b);
+ emit_fp_ucom(masm, pt, a, b);
- if (dst->encoding() != (min ? b : a)->encoding())
+ if (dst->encoding() != (min ? b : a)->encoding()) {
__ jccb(Assembler::above, above); // CF=0 & ZF=0
- else
+ } else {
__ jccb(Assembler::above, done);
+ }
__ jccb(Assembler::parity, nan); // PF=1
__ jccb(Assembler::below, below); // CF=1
// equal
__ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
- if (single) {
- __ ucomiss(a, xmmt);
- __ jccb(Assembler::equal, zero);
+ emit_fp_ucom(masm, pt, a, xmmt);
- __ movflt(dst, a);
- __ jmp(done);
- }
- else {
- __ ucomisd(a, xmmt);
- __ jccb(Assembler::equal, zero);
+ __ jccb(Assembler::equal, zero);
+ movfp(masm, pt, dst, a, rt);
- __ movdbl(dst, a);
- __ jmp(done);
- }
+ __ jmp(done);
__ bind(zero);
- if (min)
+ if (min) {
__ vpor(dst, a, b, Assembler::AVX_128bit);
- else
+ } else {
__ vpand(dst, a, b, Assembler::AVX_128bit);
+ }
__ jmp(done);
__ bind(above);
- if (single)
- __ movflt(dst, min ? b : a);
- else
- __ movdbl(dst, min ? b : a);
+ movfp(masm, pt, dst, min ? b : a, rt);
__ jmp(done);
__ bind(nan);
- if (single) {
+ if (pt == fp_prec_hlf) {
+ __ movl(rt, 0x00007e00); // Float16.NaN
+ __ evmovw(dst, rt);
+ } else if (pt == fp_prec_flt) {
__ movl(rt, 0x7fc00000); // Float.NaN
__ movdl(dst, rt);
- }
- else {
+ } else {
__ mov64(rt, 0x7ff8000000000000L); // Double.NaN
__ movdq(dst, rt);
}
__ jmp(done);
__ bind(below);
- if (single)
- __ movflt(dst, min ? a : b);
- else
- __ movdbl(dst, min ? a : b);
+ movfp(masm, pt, dst, min ? a : b, rt);
__ bind(done);
}
@@ -2604,13 +2620,8 @@ uint BoxLockNode::size(PhaseRegAlloc *ra_) const
#ifndef PRODUCT
void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{
- if (UseCompressedClassPointers) {
- st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
- st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
- } else {
- st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
- st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
- }
+ st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+ st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
}
#endif
@@ -2725,11 +2736,8 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
return (-128 <= offset && offset <= 127);
}
+#ifdef ASSERT
// Return whether or not this register is ever used as an argument.
-// This function is used on startup to build the trampoline stubs in
-// generateOptoStub. Registers not mentioned will be killed by the VM
-// call in the trampoline, and arguments in those registers not be
-// available to the callee.
bool Matcher::can_be_java_arg(int reg)
{
return
@@ -2749,11 +2757,7 @@ bool Matcher::can_be_java_arg(int reg)
reg == XMM6_num || reg == XMM6b_num ||
reg == XMM7_num || reg == XMM7b_num;
}
-
-bool Matcher::is_spillable_arg(int reg)
-{
- return can_be_java_arg(reg);
-}
+#endif
uint Matcher::int_pressure_limit()
{
@@ -2769,13 +2773,6 @@ uint Matcher::float_pressure_limit()
return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
}
-bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
- // In 64 bit mode a code which use multiply when
- // devisor is constant is faster than hardware
- // DIV instruction (it uses MulHiL).
- return false;
-}
-
// Register for DIVI projection of divmodI
const RegMask& Matcher::divI_proj_mask() {
return INT_RAX_REG_mask();
@@ -3340,6 +3337,18 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
return false;
}
break;
+ case Op_UMinReductionV:
+ case Op_UMaxReductionV:
+ if (UseAVX == 0) {
+ return false;
+ }
+ if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
+ return false;
+ }
+ if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
+ return false;
+ }
+ break;
case Op_MaxV:
case Op_MinV:
if (UseSSE < 4 && is_integral_type(bt)) {
@@ -4678,11 +4687,6 @@ frame
// Compiled code's Frame Pointer
frame_pointer(RSP);
- // Interpreter stores its frame pointer in a register which is
- // stored to the stack by I2CAdaptors.
- // I2CAdaptors convert from interpreted java to compiled java.
- interpreter_frame_pointer(RBP);
-
// Stack alignment requirement
stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
@@ -5186,6 +5190,18 @@ operand immL_65535()
interface(CONST_INTER);
%}
+// AOT Runtime Constants Address
+operand immAOTRuntimeConstantsAddress()
+%{
+ // Check if the address is in the range of AOT Runtime Constants
+ predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
+ match(ConP);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
operand kReg()
%{
constraint(ALLOC_IN_RC(vectmask_reg));
@@ -5529,12 +5545,21 @@ operand rFlagsRegU()
operand rFlagsRegUCF() %{
constraint(ALLOC_IN_RC(int_flags));
match(RegFlags);
- predicate(false);
+ predicate(!UseAPX || !VM_Version::supports_avx10_2());
format %{ "RFLAGS_U_CF" %}
interface(REG_INTER);
%}
+operand rFlagsRegUCFE() %{
+ constraint(ALLOC_IN_RC(int_flags));
+ match(RegFlags);
+ predicate(UseAPX && VM_Version::supports_avx10_2());
+
+ format %{ "RFLAGS_U_CFE" %}
+ interface(REG_INTER);
+%}
+
// Float register operands
operand regF() %{
constraint(ALLOC_IN_RC(float_reg));
@@ -6027,10 +6052,10 @@ operand cmpOp()
interface(COND_INTER) %{
equal(0x4, "e");
not_equal(0x5, "ne");
- less(0xC, "l");
- greater_equal(0xD, "ge");
- less_equal(0xE, "le");
- greater(0xF, "g");
+ less(0xc, "l");
+ greater_equal(0xd, "ge");
+ less_equal(0xe, "le");
+ greater(0xf, "g");
overflow(0x0, "o");
no_overflow(0x1, "no");
%}
@@ -6062,11 +6087,12 @@ operand cmpOpU()
// don't need to use cmpOpUCF2 for eq/ne
operand cmpOpUCF() %{
match(Bool);
- predicate(n->as_Bool()->_test._test == BoolTest::lt ||
- n->as_Bool()->_test._test == BoolTest::ge ||
- n->as_Bool()->_test._test == BoolTest::le ||
- n->as_Bool()->_test._test == BoolTest::gt ||
- n->in(1)->in(1) == n->in(1)->in(2));
+ predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
+ (n->as_Bool()->_test._test == BoolTest::lt ||
+ n->as_Bool()->_test._test == BoolTest::ge ||
+ n->as_Bool()->_test._test == BoolTest::le ||
+ n->as_Bool()->_test._test == BoolTest::gt ||
+ n->in(1)->in(1) == n->in(1)->in(2)));
format %{ "" %}
interface(COND_INTER) %{
equal(0xb, "np");
@@ -6084,7 +6110,8 @@ operand cmpOpUCF() %{
// Floating comparisons that can be fixed up with extra conditional jumps
operand cmpOpUCF2() %{
match(Bool);
- predicate((n->as_Bool()->_test._test == BoolTest::ne ||
+ predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
+ (n->as_Bool()->_test._test == BoolTest::ne ||
n->as_Bool()->_test._test == BoolTest::eq) &&
n->in(1)->in(1) != n->in(1)->in(2));
format %{ "" %}
@@ -6100,6 +6127,37 @@ operand cmpOpUCF2() %{
%}
%}
+
+// Floating point comparisons that set condition flags to test more directly,
+// Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
+// are used for L (<) and LE (<=) conditions. It's important to convert these
+// latter conditions to ones that use unsigned tests before passing into an
+// instruction because the preceding comparison might be based on a three way
+// comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
+operand cmpOpUCFE()
+%{
+ match(Bool);
+ predicate((UseAPX && VM_Version::supports_avx10_2()) &&
+ (n->as_Bool()->_test._test == BoolTest::ne ||
+ n->as_Bool()->_test._test == BoolTest::eq ||
+ n->as_Bool()->_test._test == BoolTest::lt ||
+ n->as_Bool()->_test._test == BoolTest::ge ||
+ n->as_Bool()->_test._test == BoolTest::le ||
+ n->as_Bool()->_test._test == BoolTest::gt));
+
+ format %{ "" %}
+ interface(COND_INTER) %{
+ equal(0x4, "e");
+ not_equal(0x5, "ne");
+ less(0x2, "b");
+ greater_equal(0x3, "ae");
+ less_equal(0x6, "be");
+ greater(0x7, "a");
+ overflow(0x0, "o");
+ no_overflow(0x1, "no");
+ %}
+%}
+
// Operands for bound floating pointer register arguments
operand rxmm0() %{
constraint(ALLOC_IN_RC(xmm0_reg));
@@ -7289,146 +7347,153 @@ instruct loadD(regD dst, memory mem)
ins_pipe(pipe_slow); // XXX
%}
+instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
+%{
+ match(Set dst con);
+
+ format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
+
+ ins_encode %{
+ __ load_aotrc_address($dst$$Register, (address)$con$$constant);
+ %}
+
+ ins_pipe(ialu_reg_fat);
+%}
+
+// min = java.lang.Math.min(float a, float b)
// max = java.lang.Math.max(float a, float b)
-instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
- predicate(VM_Version::supports_avx10_2());
+instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
+%{
+ predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
- format %{ "maxF $dst, $a, $b" %}
- ins_encode %{
- __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// max = java.lang.Math.max(float a, float b)
-instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
- predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
- match(Set dst (MaxF a b));
- effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
- format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
- ins_encode %{
- __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
- predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
- match(Set dst (MaxF a b));
- effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
-
- format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
- ins_encode %{
- emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
- false /*min*/, true /*single*/);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// max = java.lang.Math.max(double a, double b)
-instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
- predicate(VM_Version::supports_avx10_2());
- match(Set dst (MaxD a b));
- format %{ "maxD $dst, $a, $b" %}
- ins_encode %{
- __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// max = java.lang.Math.max(double a, double b)
-instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
- predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
- match(Set dst (MaxD a b));
- effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
- format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
- ins_encode %{
- __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
- predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
- match(Set dst (MaxD a b));
- effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
-
- format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
- ins_encode %{
- emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
- false /*min*/, false /*single*/);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// max = java.lang.Math.min(float a, float b)
-instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
- predicate(VM_Version::supports_avx10_2());
match(Set dst (MinF a b));
- format %{ "minF $dst, $a, $b" %}
+
+ format %{ "minmaxF $dst, $a, $b" %}
ins_encode %{
- __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
+ int opcode = this->ideal_Opcode();
+ __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
+%{
+ predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
+ match(Set dst (MaxF a b));
+ match(Set dst (MinF a b));
+ effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
+
+ format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ bool min = (opcode == Op_MinF) ? true : false;
+ emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
+ min, fp_prec_flt /*pt*/);
%}
ins_pipe( pipe_slow );
%}
// min = java.lang.Math.min(float a, float b)
-instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
+// max = java.lang.Math.max(float a, float b)
+instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
+%{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
+ match(Set dst (MaxF a b));
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
- format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
+
+ format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
ins_encode %{
- __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
+ int opcode = this->ideal_Opcode();
+ int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
+ __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
+ $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}
-instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
+instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
+%{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
+ match(Set dst (MaxF a b));
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
- format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
+ format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
ins_encode %{
+ int opcode = this->ideal_Opcode();
+ bool min = (opcode == Op_MinF) ? true : false;
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
- true /*min*/, true /*single*/);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// max = java.lang.Math.min(double a, double b)
-instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
- predicate(VM_Version::supports_avx10_2());
- match(Set dst (MinD a b));
- format %{ "minD $dst, $a, $b" %}
- ins_encode %{
- __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
+ min, fp_prec_flt /*pt*/);
%}
ins_pipe( pipe_slow );
%}
// min = java.lang.Math.min(double a, double b)
-instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
- predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
+// max = java.lang.Math.max(double a, double b)
+instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
+%{
+ predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
+ match(Set dst (MaxD a b));
match(Set dst (MinD a b));
- effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
- format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
+
+ format %{ "minmaxD $dst, $a, $b" %}
ins_encode %{
- __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
+ int opcode = this->ideal_Opcode();
+ __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
-instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
- predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
+instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
+%{
+ predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
+ match(Set dst (MaxD a b));
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
- format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
+ format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
ins_encode %{
+ int opcode = this->ideal_Opcode();
+ bool min = (opcode == Op_MinD) ? true : false;
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
- true /*min*/, false /*single*/);
+ min, fp_prec_dbl /*pt*/);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// min = java.lang.Math.min(double a, double b)
+// max = java.lang.Math.max(double a, double b)
+instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
+%{
+ predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
+ match(Set dst (MaxD a b));
+ match(Set dst (MinD a b));
+ effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
+
+ format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
+ __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
+ $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
+%{
+ predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
+ match(Set dst (MaxD a b));
+ match(Set dst (MinD a b));
+ effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
+
+ format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ bool min = (opcode == Op_MinD) ? true : false;
+ emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
+ min, fp_prec_dbl /*pt*/);
%}
ins_pipe( pipe_slow );
%}
@@ -8784,6 +8849,21 @@ instruct membar_release_lock()
ins_pipe(empty);
%}
+instruct membar_storeload(rFlagsReg cr) %{
+ match(MemBarStoreLoad);
+ effect(KILL cr);
+ ins_cost(400);
+
+ format %{
+ $$template
+ $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
+ %}
+ ins_encode %{
+ __ membar(Assembler::StoreLoad);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
instruct membar_volatile(rFlagsReg cr) %{
match(MemBarVolatile);
effect(KILL cr);
@@ -8811,6 +8891,21 @@ instruct unnecessary_membar_volatile()
ins_pipe(empty);
%}
+instruct membar_full(rFlagsReg cr) %{
+ match(MemBarFull);
+ effect(KILL cr);
+ ins_cost(400);
+
+ format %{
+ $$template
+ $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
+ %}
+ ins_encode %{
+ __ membar(Assembler::StoreLoad);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
instruct membar_storestore() %{
match(MemBarStoreStore);
match(StoreStoreFence);
@@ -9116,20 +9211,34 @@ instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
ins_pipe(ialu_reg);
%}
+instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
+%{
+ predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
+ match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
+
+ ins_cost(100); // XXX
+ format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
+ ins_encode %{
+ Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
+ __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
- predicate(!UseAPX);
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
+
ins_cost(200);
expand %{
cmovI_regU(cop, cr, dst, src);
%}
%}
-instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
- predicate(UseAPX);
+instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
+
ins_cost(200);
- format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
+ format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
ins_encode %{
__ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
%}
@@ -9137,7 +9246,7 @@ instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1,
%}
instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
- predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
+ predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
ins_cost(200); // XXX
@@ -9150,25 +9259,10 @@ instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src)
ins_pipe(pipe_cmov_reg);
%}
-instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
- predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
- match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
- effect(TEMP dst);
-
- ins_cost(200);
- format %{ "ecmovpl $dst, $src1, $src2\n\t"
- "cmovnel $dst, $src2" %}
- ins_encode %{
- __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
- __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
- %}
- ins_pipe(pipe_cmov_reg);
-%}
-
// Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
// inputs of the CMove
instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
- predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
+ predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
effect(TEMP dst);
@@ -9182,23 +9276,6 @@ instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src)
ins_pipe(pipe_cmov_reg);
%}
-// We need this special handling for only eq / neq comparison since NaN == NaN is false,
-// and parity flag bit is set if any of the operand is a NaN.
-instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
- predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
- match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
- effect(TEMP dst);
-
- ins_cost(200);
- format %{ "ecmovpl $dst, $src1, $src2\n\t"
- "cmovnel $dst, $src2" %}
- ins_encode %{
- __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
- __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
- %}
- ins_pipe(pipe_cmov_reg);
-%}
-
// Conditional move
instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
predicate(!UseAPX);
@@ -9241,8 +9318,8 @@ instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
%}
instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
- predicate(!UseAPX);
match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
+
ins_cost(250);
expand %{
cmovI_memU(cop, cr, dst, src);
@@ -9262,12 +9339,12 @@ instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI sr
ins_pipe(pipe_cmov_mem);
%}
-instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
+instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
%{
- predicate(UseAPX);
match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
+
ins_cost(250);
- format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
+ format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
ins_encode %{
__ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
%}
@@ -9317,8 +9394,8 @@ instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
%}
instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
- predicate(!UseAPX);
match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
+
ins_cost(200);
expand %{
cmovN_regU(cop, cr, dst, src);
@@ -9339,11 +9416,11 @@ instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN
ins_pipe(pipe_cmov_reg);
%}
-instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
- predicate(UseAPX);
+instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
+
ins_cost(200);
- format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
+ format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
ins_encode %{
__ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
%}
@@ -9437,19 +9514,19 @@ instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP
%}
instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
- predicate(!UseAPX);
match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
+
ins_cost(200);
expand %{
cmovP_regU(cop, cr, dst, src);
%}
%}
-instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
- predicate(UseAPX);
+instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
+
ins_cost(200);
- format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
+ format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
ins_encode %{
__ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
%}
@@ -9457,7 +9534,7 @@ instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1,
%}
instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
- predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
+ predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
ins_cost(200); // XXX
@@ -9470,25 +9547,10 @@ instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src)
ins_pipe(pipe_cmov_reg);
%}
-instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
- predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
- match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
- effect(TEMP dst);
-
- ins_cost(200);
- format %{ "ecmovpq $dst, $src1, $src2\n\t"
- "cmovneq $dst, $src2" %}
- ins_encode %{
- __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
- __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
- %}
- ins_pipe(pipe_cmov_reg);
-%}
-
// Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
// inputs of the CMove
instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
- predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
+ predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
ins_cost(200); // XXX
@@ -9501,21 +9563,6 @@ instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src)
ins_pipe(pipe_cmov_reg);
%}
-instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
- predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
- match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
- effect(TEMP dst);
-
- ins_cost(200);
- format %{ "ecmovpq $dst, $src1, $src2\n\t"
- "cmovneq $dst, $src2" %}
- ins_encode %{
- __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
- __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
- %}
- ins_pipe(pipe_cmov_reg);
-%}
-
instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
%{
predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
@@ -9636,21 +9683,35 @@ instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
ins_pipe(ialu_reg);
%}
+instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
+%{
+ predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
+ match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
+
+ ins_cost(100); // XXX
+ format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
+ ins_encode %{
+ Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
+ __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
- predicate(!UseAPX);
match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
+
ins_cost(200);
expand %{
cmovL_regU(cop, cr, dst, src);
%}
%}
-instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
+instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
%{
- predicate(UseAPX);
match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
+
ins_cost(200);
- format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
+ format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
ins_encode %{
__ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
%}
@@ -9658,7 +9719,7 @@ instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1,
%}
instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
- predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
+ predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
ins_cost(200); // XXX
@@ -9671,25 +9732,10 @@ instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src)
ins_pipe(pipe_cmov_reg);
%}
-instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
- predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
- match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
- effect(TEMP dst);
-
- ins_cost(200);
- format %{ "ecmovpq $dst, $src1, $src2\n\t"
- "cmovneq $dst, $src2" %}
- ins_encode %{
- __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
- __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
- %}
- ins_pipe(pipe_cmov_reg);
-%}
-
// Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
// inputs of the CMove
instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
- predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
+ predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
ins_cost(200); // XXX
@@ -9702,21 +9748,6 @@ instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src)
ins_pipe(pipe_cmov_reg);
%}
-instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
- predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
- match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
- effect(TEMP dst);
-
- ins_cost(200);
- format %{ "ecmovpq $dst, $src1, $src2\n\t"
- "cmovneq $dst, $src2" %}
- ins_encode %{
- __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
- __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
- %}
- ins_pipe(pipe_cmov_reg);
-%}
-
instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
%{
predicate(!UseAPX);
@@ -9731,8 +9762,8 @@ instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
%}
instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
- predicate(!UseAPX);
match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
+
ins_cost(200);
expand %{
cmovL_memU(cop, cr, dst, src);
@@ -9752,12 +9783,12 @@ instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL sr
ins_pipe(pipe_cmov_mem);
%}
-instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
+instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
%{
- predicate(UseAPX);
match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
+
ins_cost(200);
- format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
+ format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
ins_encode %{
__ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
%}
@@ -9802,12 +9833,31 @@ instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
+
ins_cost(200);
expand %{
cmovF_regU(cop, cr, dst, src);
%}
%}
+instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
+%{
+ match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
+
+ ins_cost(200); // XXX
+ format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
+ "movss $dst, $src\n"
+ "skip:" %}
+ ins_encode %{
+ Label Lskip;
+ // Invert sense of branch from sense of CMOV
+ __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ __ bind(Lskip);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
%{
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
@@ -9846,12 +9896,31 @@ instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
+
ins_cost(200);
expand %{
cmovD_regU(cop, cr, dst, src);
%}
%}
+instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
+%{
+ match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
+
+ ins_cost(200); // XXX
+ format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
+ "movsd $dst, $src\n"
+ "skip:" %}
+ ins_encode %{
+ Label Lskip;
+ // Invert sense of branch from sense of CMOV
+ __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ __ bind(Lskip);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
//----------Arithmetic Instructions--------------------------------------------
//----------Addition Instructions----------------------------------------------
@@ -14319,7 +14388,7 @@ instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
ins_pipe(pipe_slow);
%}
-instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
+instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
match(Set cr (CmpF src1 src2));
ins_cost(100);
@@ -14330,6 +14399,17 @@ instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
ins_pipe(pipe_slow);
%}
+instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
+ match(Set cr (CmpF src1 src2));
+
+ ins_cost(100);
+ format %{ "evucomxss $src1, $src2" %}
+ ins_encode %{
+ __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
match(Set cr (CmpF src1 (LoadF src2)));
@@ -14341,8 +14421,20 @@ instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
ins_pipe(pipe_slow);
%}
+instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
+ match(Set cr (CmpF src1 (LoadF src2)));
+
+ ins_cost(100);
+ format %{ "evucomxss $src1, $src2" %}
+ ins_encode %{
+ __ evucomxss($src1$$XMMRegister, $src2$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
match(Set cr (CmpF src con));
+
ins_cost(100);
format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
ins_encode %{
@@ -14351,6 +14443,17 @@ instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
ins_pipe(pipe_slow);
%}
+instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
+ match(Set cr (CmpF src con));
+
+ ins_cost(100);
+ format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
+ ins_encode %{
+ __ evucomxss($src$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
// Really expensive, avoid
instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
%{
@@ -14370,7 +14473,7 @@ instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
ins_pipe(pipe_slow);
%}
-instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
+instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
match(Set cr (CmpD src1 src2));
ins_cost(100);
@@ -14381,6 +14484,17 @@ instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
ins_pipe(pipe_slow);
%}
+instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
+ match(Set cr (CmpD src1 src2));
+
+ ins_cost(100);
+ format %{ "evucomxsd $src1, $src2 test" %}
+ ins_encode %{
+ __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
match(Set cr (CmpD src1 (LoadD src2)));
@@ -14392,6 +14506,17 @@ instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
ins_pipe(pipe_slow);
%}
+instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
+ match(Set cr (CmpD src1 (LoadD src2)));
+
+ ins_cost(100);
+ format %{ "evucomxsd $src1, $src2" %}
+ ins_encode %{
+ __ evucomxsd($src1$$XMMRegister, $src2$$Address);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
match(Set cr (CmpD src con));
ins_cost(100);
@@ -14402,6 +14527,17 @@ instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
ins_pipe(pipe_slow);
%}
+instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
+ match(Set cr (CmpD src con));
+
+ ins_cost(100);
+ format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
+ ins_encode %{
+ __ evucomxsd($src$$XMMRegister, $constantaddress($con));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
// Compare into -1,0,1
instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
%{
@@ -16808,6 +16944,21 @@ instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
ins_pipe(pipe_jcc);
%}
+// Jump Direct Conditional - using signed and unsigned comparison
+instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
+ match(If cop cmp);
+ effect(USE labl);
+
+ ins_cost(200);
+ format %{ "j$cop,su $labl" %}
+ size(6);
+ ins_encode %{
+ Label* L = $labl$$label;
+ __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+ %}
+ ins_pipe(pipe_jcc);
+%}
+
// ============================================================================
// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
// superklass array for an instance of the superklass. Set a hidden
@@ -17026,6 +17177,22 @@ instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
ins_short_branch(1);
%}
+// Jump Direct Conditional - using signed and unsigned comparison
+instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
+ match(If cop cmp);
+ effect(USE labl);
+
+ ins_cost(300);
+ format %{ "j$cop,sus $labl" %}
+ size(2);
+ ins_encode %{
+ Label* L = $labl$$label;
+ __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+ %}
+ ins_pipe(pipe_jcc);
+ ins_short_branch(1);
+%}
+
// ============================================================================
// inlined locking and unlocking
@@ -18674,7 +18841,7 @@ instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
- __ vmovw($rtmp$$Register, $src$$XMMRegister);
+ __ evmovw($rtmp$$Register, $src$$XMMRegister);
__ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
@@ -19231,6 +19398,8 @@ instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm
match(Set dst (XorReductionV src1 src2));
match(Set dst (MinReductionV src1 src2));
match(Set dst (MaxReductionV src1 src2));
+ match(Set dst (UMinReductionV src1 src2));
+ match(Set dst (UMaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
@@ -19252,6 +19421,8 @@ instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtm
match(Set dst (XorReductionV src1 src2));
match(Set dst (MinReductionV src1 src2));
match(Set dst (MaxReductionV src1 src2));
+ match(Set dst (UMinReductionV src1 src2));
+ match(Set dst (UMaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
@@ -19271,6 +19442,8 @@ instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtm
match(Set dst (XorReductionV src1 src2));
match(Set dst (MinReductionV src1 src2));
match(Set dst (MaxReductionV src1 src2));
+ match(Set dst (UMinReductionV src1 src2));
+ match(Set dst (UMaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
@@ -19499,6 +19672,8 @@ instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm
match(Set dst (XorReductionV src1 src2));
match(Set dst (MinReductionV src1 src2));
match(Set dst (MaxReductionV src1 src2));
+ match(Set dst (UMinReductionV src1 src2));
+ match(Set dst (UMaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
@@ -19517,6 +19692,8 @@ instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtm
match(Set dst (XorReductionV src1 src2));
match(Set dst (MinReductionV src1 src2));
match(Set dst (MaxReductionV src1 src2));
+ match(Set dst (UMinReductionV src1 src2));
+ match(Set dst (UMaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
@@ -19538,6 +19715,8 @@ instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm
match(Set dst (XorReductionV src1 src2));
match(Set dst (MinReductionV src1 src2));
match(Set dst (MaxReductionV src1 src2));
+ match(Set dst (UMinReductionV src1 src2));
+ match(Set dst (UMaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
@@ -20777,7 +20956,7 @@ instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
int vlen_enc = vector_length_encoding(this);
int opcode = this->ideal_Opcode();
BasicType elem_bt = Matcher::vector_element_basic_type(this);
- __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
+ __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -23793,8 +23972,12 @@ instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
format %{ "vector_mask_gen $len \t! vector mask generator" %}
effect(TEMP temp);
ins_encode %{
- __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
- __ kmovql($dst$$KRegister, $temp$$Register);
+ if ($len$$constant > 0) {
+ __ mov64($temp$$Register, right_n_bits($len$$constant));
+ __ kmovql($dst$$KRegister, $temp$$Register);
+ } else {
+ __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
+ }
%}
ins_pipe( pipe_slow );
%}
@@ -25121,9 +25304,9 @@ instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
instruct reinterpretS2HF(regF dst, rRegI src)
%{
match(Set dst (ReinterpretS2HF src));
- format %{ "vmovw $dst, $src" %}
+ format %{ "evmovw $dst, $src" %}
ins_encode %{
- __ vmovw($dst$$XMMRegister, $src$$Register);
+ __ evmovw($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow);
%}
@@ -25131,9 +25314,9 @@ instruct reinterpretS2HF(regF dst, rRegI src)
instruct reinterpretHF2S(rRegI dst, regF src)
%{
match(Set dst (ReinterpretHF2S src));
- format %{ "vmovw $dst, $src" %}
+ format %{ "evmovw $dst, $src" %}
ins_encode %{
- __ vmovw($dst$$Register, $src$$XMMRegister);
+ __ evmovw($dst$$Register, $src$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}
@@ -25187,10 +25370,11 @@ instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
predicate(VM_Version::supports_avx10_2());
match(Set dst (MaxHF src1 src2));
match(Set dst (MinHF src1 src2));
+
format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
ins_encode %{
- int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
- __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
+ int opcode = this->ideal_Opcode();
+ __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
%}
ins_pipe( pipe_slow );
%}
@@ -25201,11 +25385,12 @@ instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xt
match(Set dst (MaxHF src1 src2));
match(Set dst (MinHF src1 src2));
effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
+
format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
- __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
- $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
+ __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
+ $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -25305,8 +25490,9 @@ instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
- int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
- __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
+ int opcode = this->ideal_Opcode();
+ __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
+ k0, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -25319,8 +25505,9 @@ instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
- int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
- __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
+ int opcode = this->ideal_Opcode();
+ __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
+ k0, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -25335,8 +25522,8 @@ instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1,
ins_encode %{
int vlen_enc = vector_length_encoding(this);
int opcode = this->ideal_Opcode();
- __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
- $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
+ __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
+ $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
diff --git a/src/hotspot/cpu/zero/bytecodeInterpreter_zero.inline.hpp b/src/hotspot/cpu/zero/bytecodeInterpreter_zero.inline.hpp
index 4d813cd53c6..4c73368b673 100644
--- a/src/hotspot/cpu/zero/bytecodeInterpreter_zero.inline.hpp
+++ b/src/hotspot/cpu/zero/bytecodeInterpreter_zero.inline.hpp
@@ -26,6 +26,8 @@
#ifndef CPU_ZERO_BYTECODEINTERPRETER_ZERO_INLINE_HPP
#define CPU_ZERO_BYTECODEINTERPRETER_ZERO_INLINE_HPP
+#include "sanitizers/ub.hpp"
+
// Inline interpreter functions for zero
inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) {
@@ -40,6 +42,7 @@ inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) {
return op1 * op2;
}
+ATTRIBUTE_NO_UBSAN // IEEE-754 division by zero is well-defined
inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) {
return op1 / op2;
}
@@ -68,7 +71,7 @@ inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2],
}
inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) {
- return op1 + op2;
+ return java_add(op1, op2);
}
inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) {
@@ -82,7 +85,7 @@ inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) {
}
inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) {
- return op1 * op2;
+ return java_multiply(op1, op2);
}
inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
@@ -90,7 +93,7 @@ inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
}
inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) {
- return op1 - op2;
+ return java_subtract(op1, op2);
}
inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) {
@@ -104,19 +107,19 @@ inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) {
}
inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) {
- return ((unsigned long long) op1) >> (op2 & 0x3F);
+ return java_shift_right_unsigned(op1, op2);
}
inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) {
- return op1 >> (op2 & 0x3F);
+ return java_shift_right(op1, op2);
}
inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) {
- return op1 << (op2 & 0x3F);
+ return java_shift_left(op1, op2);
}
inline jlong BytecodeInterpreter::VMlongNeg(jlong op) {
- return -op;
+ return java_negate(op);
}
inline jlong BytecodeInterpreter::VMlongNot(jlong op) {
@@ -183,8 +186,8 @@ inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) {
return op1 + op2;
}
+ATTRIBUTE_NO_UBSAN // IEEE-754 division by zero is well-defined
inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) {
- // Divide by zero... QQQ
return op1 / op2;
}
@@ -228,7 +231,7 @@ inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) {
// Integer Arithmetic
inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) {
- return op1 + op2;
+ return java_add(op1, op2);
}
inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
@@ -242,11 +245,11 @@ inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
}
inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) {
- return op1 * op2;
+ return java_multiply(op1, op2);
}
inline jint BytecodeInterpreter::VMintNeg(jint op) {
- return -op;
+ return java_negate(op);
}
inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
@@ -260,19 +263,19 @@ inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
}
inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) {
- return op1 << (op2 & 0x1F);
+ return java_shift_left(op1, op2);
}
inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) {
- return op1 >> (op2 & 0x1F);
+ return java_shift_right(op1, op2);
}
inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) {
- return op1 - op2;
+ return java_subtract(op1, op2);
}
inline juint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
- return ((juint) op1) >> (op2 & 0x1F);
+ return java_shift_right_unsigned(op1, op2);
}
inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) {
diff --git a/src/hotspot/cpu/zero/globals_zero.hpp b/src/hotspot/cpu/zero/globals_zero.hpp
index 6b6c6ea983c..6dc7d81275c 100644
--- a/src/hotspot/cpu/zero/globals_zero.hpp
+++ b/src/hotspot/cpu/zero/globals_zero.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2009, 2010, 2011 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -39,7 +39,7 @@ define_pd_global(bool, UncommonNullCast, true);
define_pd_global(bool, DelayCompilerStubsGeneration, false); // Don't have compiler's stubs
define_pd_global(size_t, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
-define_pd_global(intx, CodeEntryAlignment, 32);
+define_pd_global(uint, CodeEntryAlignment, 32);
define_pd_global(intx, OptoLoopAlignment, 16);
define_pd_global(intx, InlineSmallCode, 1000);
diff --git a/src/hotspot/cpu/zero/register_zero.hpp b/src/hotspot/cpu/zero/register_zero.hpp
index fd30f206762..846b649eebd 100644
--- a/src/hotspot/cpu/zero/register_zero.hpp
+++ b/src/hotspot/cpu/zero/register_zero.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -48,7 +48,6 @@ class RegisterImpl : public AbstractRegisterImpl {
};
// construction
- inline friend Register as_Register(int encoding);
VMReg as_VMReg();
// derived registers, offsets, and addresses
@@ -113,7 +112,6 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
static const int max_fpr;
};
-CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
-#define noreg ((Register)(noreg_RegisterEnumValue))
+const Register noreg = as_Register(-1);
#endif // CPU_ZERO_REGISTER_ZERO_HPP
diff --git a/src/hotspot/cpu/zero/stubDeclarations_zero.hpp b/src/hotspot/cpu/zero/stubDeclarations_zero.hpp
index 2357bbb5169..9abe313b3a7 100644
--- a/src/hotspot/cpu/zero/stubDeclarations_zero.hpp
+++ b/src/hotspot/cpu/zero/stubDeclarations_zero.hpp
@@ -29,35 +29,40 @@
#define STUBGEN_PREUNIVERSE_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(preuniverse, 0) \
#define STUBGEN_INITIAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(initial, 0) \
#define STUBGEN_CONTINUATION_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(continuation, 0) \
#define STUBGEN_COMPILER_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(compiler, 0) \
#define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \
do_arch_blob, \
do_arch_entry, \
- do_arch_entry_init) \
+ do_arch_entry_init, \
+ do_arch_entry_array) \
do_arch_blob(final, 0) \
diff --git a/src/hotspot/cpu/zero/stubGenerator_zero.cpp b/src/hotspot/cpu/zero/stubGenerator_zero.cpp
index 08cb173b507..569a2fa8ca9 100644
--- a/src/hotspot/cpu/zero/stubGenerator_zero.cpp
+++ b/src/hotspot/cpu/zero/stubGenerator_zero.cpp
@@ -213,7 +213,7 @@ class StubGenerator: public StubCodeGenerator {
}
public:
- StubGenerator(CodeBuffer* code, BlobId blob_id) : StubCodeGenerator(code, blob_id) {
+ StubGenerator(CodeBuffer* code, BlobId blob_id, AOTStubData *stub_data) : StubCodeGenerator(code, blob_id, stub_data) {
switch(blob_id) {
case BlobId::stubgen_preuniverse_id:
generate_preuniverse_stubs();
@@ -237,8 +237,8 @@ class StubGenerator: public StubCodeGenerator {
}
};
-void StubGenerator_generate(CodeBuffer* code, BlobId blob_id) {
- StubGenerator g(code, blob_id);
+void StubGenerator_generate(CodeBuffer* code, BlobId blob_id, AOTStubData *stub_data) {
+ StubGenerator g(code, blob_id, stub_data);
}
EntryFrame *EntryFrame::build(const intptr_t* parameters,
diff --git a/src/hotspot/cpu/zero/stubRoutines_zero.cpp b/src/hotspot/cpu/zero/stubRoutines_zero.cpp
index 9b53f09be5d..196907b061f 100644
--- a/src/hotspot/cpu/zero/stubRoutines_zero.cpp
+++ b/src/hotspot/cpu/zero/stubRoutines_zero.cpp
@@ -30,3 +30,9 @@
address StubRoutines::crc_table_addr() { ShouldNotCallThis(); return nullptr; }
address StubRoutines::crc32c_table_addr() { ShouldNotCallThis(); return nullptr; }
+
+#if INCLUDE_CDS
+// nothing to do for zero
+void StubRoutines::init_AOTAddressTable() {
+}
+#endif // INCLUDE_CDS
diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
index 28c2364315e..1b20761f6e4 100644
--- a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
+++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
@@ -368,12 +368,15 @@ int ZeroInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) {
goto unlock_unwind_and_return;
void **arguments;
- void *mirror; {
+ // These locals must remain on stack until call completes
+ void *mirror;
+ void *env;
+ {
arguments =
(void **) stack->alloc(handler->argument_count() * sizeof(void **));
void **dst = arguments;
- void *env = thread->jni_environment();
+ env = thread->jni_environment();
*(dst++) = &env;
if (method->is_static()) {
diff --git a/src/hotspot/os/aix/decoder_aix.hpp b/src/hotspot/os/aix/decoder_aix.hpp
index 2ba3e1c5a3a..632355ccf4e 100644
--- a/src/hotspot/os/aix/decoder_aix.hpp
+++ b/src/hotspot/os/aix/decoder_aix.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -38,7 +38,7 @@ class AIXDecoder: public AbstractDecoder {
virtual bool demangle(const char* symbol, char* buf, int buflen) { return false; } // use AixSymbols::get_function_name to demangle
virtual bool decode(address addr, char* buf, int buflen, int* offset, const char* modulepath, bool demangle) {
- return AixSymbols::get_function_name(addr, buf, buflen, offset, 0, demangle);
+ return AixSymbols::get_function_name(addr, buf, buflen, offset, nullptr, demangle);
}
virtual bool decode(address addr, char *buf, int buflen, int* offset, const void *base) {
ShouldNotReachHere();
diff --git a/src/hotspot/os/aix/globals_aix.hpp b/src/hotspot/os/aix/globals_aix.hpp
index 14b956235e8..adc189666ef 100644
--- a/src/hotspot/os/aix/globals_aix.hpp
+++ b/src/hotspot/os/aix/globals_aix.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2024 SAP SE. All rights reserved.
+ * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -37,16 +37,6 @@
range, \
constraint) \
\
- /* Whether to allow the VM to run if EXTSHM=ON. EXTSHM is an environment */ \
- /* variable used on AIX to activate certain hacks which allow more shm segments */\
- /* for 32bit processes. For 64bit processes, it is pointless and may have */ \
- /* harmful side effects (e.g. for some reasonn prevents allocation of 64k pages */\
- /* via shmctl). */ \
- /* Per default we quit with an error if that variable is found; for certain */ \
- /* customer scenarios, we may want to be able to run despite that variable. */ \
- product(bool, AllowExtshm, false, DIAGNOSTIC, \
- "Allow VM to run with EXTSHM=ON.") \
- \
/* Maximum expected size of the data segment. That correlates with the */ \
/* maximum C Heap consumption we expect. */ \
/* We need to leave "breathing space" for the data segment when */ \
@@ -61,10 +51,6 @@
product(bool, OptimizePollingPageLocation, true, DIAGNOSTIC, \
"Optimize the location of the polling page used for Safepoints") \
\
- /* Use 64K pages for virtual memory (shmat). */ \
- product(bool, Use64KPages, true, DIAGNOSTIC, \
- "Use 64K pages if available.") \
- \
/* Normally AIX commits memory on touch, but sometimes it is helpful to have */ \
/* explicit commit behaviour. This flag, if true, causes the VM to touch */ \
/* memory on os::commit_memory() (which normally is a noop). */ \
@@ -79,7 +65,6 @@
//
// UseLargePages means nothing, for now, on AIX.
-// Use Use64KPages or Use16MPages instead.
define_pd_global(size_t, PreTouchParallelChunkSize, 1 * G);
define_pd_global(bool, UseLargePages, false);
define_pd_global(bool, UseLargePagesIndividualAllocation, false);
diff --git a/src/hotspot/os/aix/libodm_aix.cpp b/src/hotspot/os/aix/libodm_aix.cpp
index 38e8067181a..57eee47c098 100644
--- a/src/hotspot/os/aix/libodm_aix.cpp
+++ b/src/hotspot/os/aix/libodm_aix.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2015, 2026, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015, 2019 SAP SE. All rights reserved.
+ * Copyright (c) 2015, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -63,13 +63,12 @@ dynamicOdm::~dynamicOdm() {
void odmWrapper::clean_data() { if (_data) { permit_forbidden_function::free(_data); _data = nullptr; } }
-int odmWrapper::class_offset(const char *field, bool is_aix_5)
+int odmWrapper::class_offset(const char *field)
{
assert(has_class(), "initialization");
for (int i = 0; i < odm_class()->nelem; i++) {
if (strcmp(odm_class()->elem[i].elemname, field) == 0) {
int offset = odm_class()->elem[i].offset;
- if (is_aix_5) { offset += LINK_VAL_OFFSET; }
return offset;
}
}
@@ -88,11 +87,10 @@ void odmWrapper::determine_os_kernel_version(uint32_t* p_ver) {
return;
}
int voff, roff, moff, foff;
- bool is_aix_5 = (major_aix_version == 5);
- voff = odm.class_offset("ver", is_aix_5);
- roff = odm.class_offset("rel", is_aix_5);
- moff = odm.class_offset("mod", is_aix_5);
- foff = odm.class_offset("fix", is_aix_5);
+ voff = odm.class_offset("ver");
+ roff = odm.class_offset("rel");
+ moff = odm.class_offset("mod");
+ foff = odm.class_offset("fix");
if (voff == -1 || roff == -1 || moff == -1 || foff == -1) {
trcVerbose("try_determine_os_kernel_version: could not get offsets");
return;
diff --git a/src/hotspot/os/aix/libodm_aix.hpp b/src/hotspot/os/aix/libodm_aix.hpp
index 924ccaf8c51..11e67a4f5ae 100644
--- a/src/hotspot/os/aix/libodm_aix.hpp
+++ b/src/hotspot/os/aix/libodm_aix.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015, 2024 SAP SE. All rights reserved.
+ * Copyright (c) 2015, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -82,7 +82,7 @@ class odmWrapper : private dynamicOdm {
CLASS_SYMBOL odm_class() { return _odm_class; }
bool has_class() { return odm_class() != (CLASS_SYMBOL)-1; }
- int class_offset(const char *field, bool is_aix_5);
+ int class_offset(const char *field);
char* data() { return _data; }
char* retrieve_obj(const char* name = nullptr) {
diff --git a/src/hotspot/os/aix/os_aix.cpp b/src/hotspot/os/aix/os_aix.cpp
index d7c1911a914..3cad24d388c 100644
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -122,17 +122,10 @@
extern "C"
int mread_real_time(timebasestruct_t *t, size_t size_of_timebasestruct_t);
-#if !defined(_AIXVERSION_610)
-extern "C" int getthrds64(pid_t, struct thrdentry64*, int, tid64_t*, int);
-extern "C" int getprocs64(procentry64*, int, fdsinfo*, int, pid_t*, int);
-extern "C" int getargs(procsinfo*, int, char*, int);
-#endif
-
#define MAX_PATH (2 * K)
// for multipage initialization error analysis (in 'g_multipage_error')
#define ERROR_MP_OS_TOO_OLD 100
-#define ERROR_MP_EXTSHM_ACTIVE 101
#define ERROR_MP_VMGETINFO_FAILED 102
#define ERROR_MP_VMGETINFO_CLAIMS_NO_SUPPORT_FOR_64K 103
@@ -184,9 +177,6 @@ uint32_t os::Aix::_os_version = 0;
// -1 = uninitialized, 0 - no, 1 - yes
int os::Aix::_xpg_sus_mode = -1;
-// -1 = uninitialized, 0 - no, 1 - yes
-int os::Aix::_extshm = -1;
-
////////////////////////////////////////////////////////////////////////////////
// local variables
@@ -216,7 +206,7 @@ static address g_brk_at_startup = nullptr;
// shmctl(). Different shared memory regions can have different page
// sizes.
//
-// More information can be found at AIBM info center:
+// More information can be found at IBM info center:
// http://publib.boulder.ibm.com/infocenter/aix/v6r1/index.jsp?topic=/com.ibm.aix.prftungd/doc/prftungd/multiple_page_size_app_support.htm
//
static struct {
@@ -703,7 +693,7 @@ static void *thread_native_entry(Thread *thread) {
log_info(os, thread)("Thread finished (tid: %zu, kernel thread id: %zu).",
os::current_thread_id(), (uintx) kernel_thread_id);
- return 0;
+ return nullptr;
}
bool os::create_thread(Thread* thread, ThreadType thr_type,
@@ -1201,13 +1191,6 @@ void os::print_memory_info(outputStream* st) {
const char* const ldr_cntrl = ::getenv("LDR_CNTRL");
st->print_cr(" LDR_CNTRL=%s.", ldr_cntrl ? ldr_cntrl : "");
- // Print out EXTSHM because it is an unsupported setting.
- const char* const extshm = ::getenv("EXTSHM");
- st->print_cr(" EXTSHM=%s.", extshm ? extshm : "");
- if ( (strcmp(extshm, "on") == 0) || (strcmp(extshm, "ON") == 0) ) {
- st->print_cr(" *** Unsupported! Please remove EXTSHM from your environment! ***");
- }
-
// Print out AIXTHREAD_GUARDPAGES because it affects the size of pthread stacks.
const char* const aixthread_guardpages = ::getenv("AIXTHREAD_GUARDPAGES");
st->print_cr(" AIXTHREAD_GUARDPAGES=%s.",
@@ -1753,10 +1736,9 @@ bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
return true;
}
-bool os::remove_stack_guard_pages(char* addr, size_t size) {
+void os::remove_stack_guard_pages(char* addr, size_t size) {
// Do not call this; no need to commit stack pages on AIX.
ShouldNotReachHere();
- return true;
}
void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
@@ -1957,11 +1939,6 @@ char* os::pd_reserve_memory_special(size_t bytes, size_t alignment, size_t page_
return nullptr;
}
-bool os::pd_release_memory_special(char* base, size_t bytes) {
- fatal("os::release_memory_special should not be called on AIX.");
- return false;
-}
-
size_t os::large_page_size() {
return _large_page_size;
}
@@ -2143,46 +2120,10 @@ void os::init(void) {
// 64k no --- AIX 5.2 ? ---
// 64k yes 64k new systems and standard java loader (we set datapsize=64k when linking)
- // We explicitly leave no option to change page size, because only upgrading would work,
- // not downgrading (if stack page size is 64k you cannot pretend its 4k).
-
- if (g_multipage_support.datapsize == 4*K) {
- // datapsize = 4K. Data segment, thread stacks are 4K paged.
- if (g_multipage_support.can_use_64K_pages || g_multipage_support.can_use_64K_mmap_pages) {
- // .. but we are able to use 64K pages dynamically.
- // This would be typical for java launchers which are not linked
- // with datapsize=64K (like, any other launcher but our own).
- //
- // In this case it would be smart to allocate the java heap with 64K
- // to get the performance benefit, and to fake 64k pages for the
- // data segment (when dealing with thread stacks).
- //
- // However, leave a possibility to downgrade to 4K, using
- // -XX:-Use64KPages.
- if (Use64KPages) {
- trcVerbose("64K page mode (faked for data segment)");
- set_page_size(64*K);
- } else {
- trcVerbose("4K page mode (Use64KPages=off)");
- set_page_size(4*K);
- }
- } else {
- // .. and not able to allocate 64k pages dynamically. Here, just
- // fall back to 4K paged mode and use mmap for everything.
- trcVerbose("4K page mode");
- set_page_size(4*K);
- FLAG_SET_ERGO(Use64KPages, false);
- }
- } else {
- // datapsize = 64k. Data segment, thread stacks are 64k paged.
- // This normally means that we can allocate 64k pages dynamically.
- // (There is one special case where this may be false: EXTSHM=on.
- // but we decided to not support that mode).
- assert0(g_multipage_support.can_use_64K_pages || g_multipage_support.can_use_64K_mmap_pages);
- set_page_size(64*K);
- trcVerbose("64K page mode");
- FLAG_SET_ERGO(Use64KPages, true);
- }
+ // datapsize = 64k. Data segment, thread stacks are 64k paged.
+ // This normally means that we can allocate 64k pages dynamically.
+ assert0(g_multipage_support.can_use_64K_pages || g_multipage_support.can_use_64K_mmap_pages);
+ set_page_size(64*K);
// For now UseLargePages is just ignored.
FLAG_SET_ERGO(UseLargePages, false);
@@ -2565,23 +2506,18 @@ void os::Aix::initialize_os_info() {
assert(minor > 0, "invalid OS release");
_os_version = (major << 24) | (minor << 16);
char ver_str[20] = {0};
- const char* name_str = "unknown OS";
- if (strcmp(uts.sysname, "AIX") == 0) {
- // We run on AIX. We do not support versions older than AIX 7.1.
- // Determine detailed AIX version: Version, Release, Modification, Fix Level.
- odmWrapper::determine_os_kernel_version(&_os_version);
- if (os_version_short() < 0x0701) {
- log_warning(os)("AIX releases older than AIX 7.1 are not supported.");
- assert(false, "AIX release too old.");
- }
- name_str = "AIX";
- jio_snprintf(ver_str, sizeof(ver_str), "%u.%u.%u.%u",
- major, minor, (_os_version >> 8) & 0xFF, _os_version & 0xFF);
- } else {
- assert(false, "%s", name_str);
+ // We do not support versions older than AIX 7.2 TL 5.
+ // Determine detailed AIX version: Version, Release, Modification, Fix Level.
+ odmWrapper::determine_os_kernel_version(&_os_version);
+ if (_os_version < 0x07020500) {
+ log_warning(os)("AIX releases older than AIX 7.2 TL 5 are not supported.");
+ assert(false, "AIX release too old.");
}
- log_info(os)("We run on %s %s", name_str, ver_str);
+
+ jio_snprintf(ver_str, sizeof(ver_str), "%u.%u.%u.%u",
+ major, minor, (_os_version >> 8) & 0xFF, _os_version & 0xFF);
+ log_info(os)("We run on AIX %s", ver_str);
}
guarantee(_os_version, "Could not determine AIX release");
@@ -2594,28 +2530,13 @@ void os::Aix::initialize_os_info() {
void os::Aix::scan_environment() {
char* p;
- int rc;
- // Warn explicitly if EXTSHM=ON is used. That switch changes how
- // System V shared memory behaves. One effect is that page size of
- // shared memory cannot be change dynamically, effectivly preventing
- // large pages from working.
- // This switch was needed on AIX 32bit, but on AIX 64bit the general
- // recommendation is (in OSS notes) to switch it off.
+ // Reject EXTSHM=ON. That switch changes how System V shared memory behaves
+ // and prevents allocation of 64k pages for the heap.
p = ::getenv("EXTSHM");
trcVerbose("EXTSHM=%s.", p ? p : "");
if (p && strcasecmp(p, "ON") == 0) {
- _extshm = 1;
- log_warning(os)("*** Unsupported mode! Please remove EXTSHM from your environment! ***");
- if (!AllowExtshm) {
- // We allow under certain conditions the user to continue. However, we want this
- // to be a fatal error by default. On certain AIX systems, leaving EXTSHM=ON means
- // that the VM is not able to allocate 64k pages for the heap.
- // We do not want to run with reduced performance.
- vm_exit_during_initialization("EXTSHM is ON. Please remove EXTSHM from your environment.");
- }
- } else {
- _extshm = 0;
+ vm_exit_during_initialization("EXTSHM is ON. Please remove EXTSHM from your environment.");
}
// SPEC1170 behaviour: will change the behaviour of a number of POSIX APIs.
@@ -2746,3 +2667,7 @@ void os::print_memory_mappings(char* addr, size_t bytes, outputStream* st) {}
void os::jfr_report_memory_info() {}
#endif // INCLUDE_JFR
+
+void os::print_open_file_descriptors(outputStream* st) {
+ // File descriptor counting not implemented on AIX
+}
diff --git a/src/hotspot/os/aix/os_aix.hpp b/src/hotspot/os/aix/os_aix.hpp
index a30e2077fc2..e21d2cf81bb 100644
--- a/src/hotspot/os/aix/os_aix.hpp
+++ b/src/hotspot/os/aix/os_aix.hpp
@@ -49,11 +49,6 @@ class os::Aix {
// 1 - SPEC1170 requested (XPG_SUS_ENV is ON)
static int _xpg_sus_mode;
- // -1 = uninitialized,
- // 0 - EXTSHM=OFF or not set
- // 1 - EXTSHM=ON
- static int _extshm;
-
static bool available_memory(physical_memory_size_type& value);
static bool free_memory(physical_memory_size_type& value);
static physical_memory_size_type physical_memory() { return _physical_memory; }
@@ -111,12 +106,6 @@ class os::Aix {
return _xpg_sus_mode;
}
- // Returns true if EXTSHM=ON.
- static bool extshm() {
- assert(_extshm != -1, "not initialized");
- return _extshm;
- }
-
// result struct for get_meminfo()
struct meminfo_t {
diff --git a/src/hotspot/os/aix/os_perf_aix.cpp b/src/hotspot/os/aix/os_perf_aix.cpp
index aa8819d035f..cbf78083483 100644
--- a/src/hotspot/os/aix/os_perf_aix.cpp
+++ b/src/hotspot/os/aix/os_perf_aix.cpp
@@ -143,12 +143,6 @@ static OSReturn get_jvm_load(double* jvm_uload, double* jvm_sload) {
return OS_OK;
}
-static void update_prev_time(jvm_time_store_t* from, jvm_time_store_t* to) {
- if (from && to) {
- memcpy(to, from, sizeof(jvm_time_store_t));
- }
-}
-
static void update_prev_ticks(cpu_tick_store_t* from, cpu_tick_store_t* to) {
if (from && to) {
memcpy(to, from, sizeof(cpu_tick_store_t));
diff --git a/src/hotspot/os/aix/porting_aix.cpp b/src/hotspot/os/aix/porting_aix.cpp
index 7311afc197b..f0527136d90 100644
--- a/src/hotspot/os/aix/porting_aix.cpp
+++ b/src/hotspot/os/aix/porting_aix.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2012, 2024 SAP SE. All rights reserved.
- * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
+ * Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -78,7 +78,7 @@ class fixed_strings {
public:
- fixed_strings() : first(0) {}
+ fixed_strings() : first(nullptr) {}
~fixed_strings() {
node* n = first;
while (n) {
@@ -113,7 +113,7 @@ bool AixSymbols::get_function_name (
// information (null if not available)
bool demangle // [in] whether to demangle the name
) {
- struct tbtable* tb = 0;
+ struct tbtable* tb = nullptr;
unsigned int searchcount = 0;
// initialize output parameters
@@ -426,6 +426,10 @@ int dladdr(void* addr, Dl_info* info) {
}
+int JVM_dladdr(void* addr, Dl_info* info) {
+ return dladdr(addr, info);
+}
+
/////////////////////////////////////////////////////////////////////////////
// Native callstack dumping
@@ -653,10 +657,10 @@ void AixNativeCallstack::print_callstack_for_context(outputStream* st, const uco
// To print the first frame, use the current value of iar:
// current entry indicated by iar (the current pc)
- codeptr_t cur_iar = 0;
- stackptr_t cur_sp = 0;
- codeptr_t cur_rtoc = 0;
- codeptr_t cur_lr = 0;
+ codeptr_t cur_iar = nullptr;
+ stackptr_t cur_sp = nullptr;
+ codeptr_t cur_rtoc = nullptr;
+ codeptr_t cur_lr = nullptr;
const ucontext_t* uc = (const ucontext_t*) context;
@@ -926,7 +930,7 @@ static struct handletableentry* p_handletable = nullptr;
static const char* rtv_linkedin_libpath() {
constexpr int bufsize = 4096;
static char buffer[bufsize];
- static const char* libpath = 0;
+ static const char* libpath = nullptr;
// we only try to retrieve the libpath once. After that try we
// let libpath point to buffer, which then contains a valid libpath
diff --git a/src/hotspot/os/aix/porting_aix.hpp b/src/hotspot/os/aix/porting_aix.hpp
index a1a22d81471..0bd71079d0a 100644
--- a/src/hotspot/os/aix/porting_aix.hpp
+++ b/src/hotspot/os/aix/porting_aix.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2024 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -37,25 +37,9 @@
// (see http://linux.die.net/man/3/dladdr)
// dladdr(3) is not POSIX but a GNU extension, and is not available on AIX.
//
-// Differences between AIX dladdr and Linux dladdr:
-//
-// 1) Dl_info.dli_fbase: can never work, is disabled.
-// A loaded image on AIX is divided in multiple segments, at least two
-// (text and data) but potentially also far more. This is because the loader may
-// load each member into an own segment, as for instance happens with the libC.a
-// 2) Dl_info.dli_sname: This only works for code symbols (functions); for data, a
-// zero-length string is returned ("").
-// 3) Dl_info.dli_saddr: For code, this will return the entry point of the function,
-// not the function descriptor.
-typedef struct {
- const char *dli_fname; // file path of loaded library
- // void *dli_fbase;
- const char *dli_sname; // symbol name; "" if not known
- void *dli_saddr; // address of *entry* of function; not function descriptor;
-} Dl_info;
+#include "dl_info.h"
-// Note: we export this to use it inside J2se too
#ifdef __cplusplus
extern "C"
#endif
diff --git a/src/hotspot/os/bsd/globals_bsd.hpp b/src/hotspot/os/bsd/globals_bsd.hpp
index 850d491a11f..22f587ed789 100644
--- a/src/hotspot/os/bsd/globals_bsd.hpp
+++ b/src/hotspot/os/bsd/globals_bsd.hpp
@@ -28,6 +28,7 @@
//
// Declare Bsd specific flags. They are not available on other platforms.
//
+#ifdef AARCH64
#define RUNTIME_OS_FLAGS(develop, \
develop_pd, \
product, \
@@ -35,9 +36,21 @@
range, \
constraint) \
\
- AARCH64_ONLY(develop(bool, AssertWXAtThreadSync, true, \
- "Conservatively check W^X thread state at possible safepoint" \
- "or handshake"))
+ develop(bool, TraceWXHealing, false, \
+ "track occurrences of W^X mode healing") \
+ develop(bool, UseOldWX, false, \
+ "Choose old W^X implementation.") \
+ product(bool, StressWXHealing, false, DIAGNOSTIC, \
+ "Stress W xor X healing on MacOS")
+
+#else
+#define RUNTIME_OS_FLAGS(develop, \
+ develop_pd, \
+ product, \
+ product_pd, \
+ range, \
+ constraint)
+#endif
// end of RUNTIME_OS_FLAGS
diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp
index 0e21c2d1785..a4d9a2197a5 100644
--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@@ -76,6 +76,7 @@
# include
# include
# include
+# include
# include
# include
# include
@@ -102,6 +103,7 @@
#endif
#ifdef __APPLE__
+ #include
#include
#include
#endif
@@ -841,6 +843,7 @@ jlong os::javaTimeNanos() {
// We might also condition (c) on the magnitude of the delta between obsv and now.
// Avoiding excessive CAS operations to hot RW locations is critical.
// See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
+ // https://web.archive.org/web/20131214182431/https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
return (prev == obsv) ? now : obsv;
}
@@ -1781,10 +1784,8 @@ bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
return os::commit_memory(addr, size, !ExecMem);
}
-// If this is a growable mapping, remove the guard pages entirely by
-// munmap()ping them. If not, just call uncommit_memory().
-bool os::remove_stack_guard_pages(char* addr, size_t size) {
- return os::uncommit_memory(addr, size);
+void os::remove_stack_guard_pages(char* addr, size_t size) {
+ os::uncommit_memory(addr, size);
}
// 'requested_addr' is only treated as a hint, the return value may or
@@ -1886,11 +1887,6 @@ char* os::pd_reserve_memory_special(size_t bytes, size_t alignment, size_t page_
return nullptr;
}
-bool os::pd_release_memory_special(char* base, size_t bytes) {
- fatal("os::release_memory_special should not be called on BSD.");
- return false;
-}
-
size_t os::large_page_size() {
return _large_page_size;
}
@@ -2602,3 +2598,45 @@ bool os::pd_dll_unload(void* libhandle, char* ebuf, int ebuflen) {
return res;
} // end: os::pd_dll_unload()
+
+void os::print_open_file_descriptors(outputStream* st) {
+#ifdef __APPLE__
+ char buf[1024 * sizeof(struct proc_fdinfo)];
+ os::Bsd::print_open_file_descriptors(st, buf, sizeof(buf));
+#else
+ st->print_cr("Open File Descriptors: unknown");
+#endif
+}
+
+void os::Bsd::print_open_file_descriptors(outputStream* st, char* buf, size_t buflen) {
+#ifdef __APPLE__
+ pid_t my_pid;
+
+ // ensure the scratch buffer is big enough for at least one FD info struct
+ precond(buflen >= sizeof(struct proc_fdinfo));
+ kern_return_t kres = pid_for_task(mach_task_self(), &my_pid);
+ if (kres != KERN_SUCCESS) {
+ st->print_cr("Open File Descriptors: unknown");
+ return;
+ }
+ size_t max_fds = buflen / sizeof(struct proc_fdinfo);
+ struct proc_fdinfo* fds = reinterpret_cast(buf);
+
+ // fill our buffer with FD info, up to the available buffer size
+ int res = proc_pidinfo(my_pid, PROC_PIDLISTFDS, 0, fds, max_fds * sizeof(struct proc_fdinfo));
+ if (res <= 0) {
+ st->print_cr("Open File Descriptors: unknown");
+ return;
+ }
+
+ // print lower threshold if count exceeds buffer size
+ int nfiles = res / sizeof(struct proc_fdinfo);
+ if ((size_t)nfiles >= max_fds) {
+ st->print_cr("Open File Descriptors: > %zu", max_fds);
+ return;
+ }
+ st->print_cr("Open File Descriptors: %d", nfiles);
+#else
+ st->print_cr("Open File Descriptors: unknown");
+#endif
+}
diff --git a/src/hotspot/os/bsd/os_bsd.hpp b/src/hotspot/os/bsd/os_bsd.hpp
index da73211b9a7..e87a680b2d2 100644
--- a/src/hotspot/os/bsd/os_bsd.hpp
+++ b/src/hotspot/os/bsd/os_bsd.hpp
@@ -123,6 +123,8 @@ class os::Bsd {
static int get_node_by_cpu(int cpu_id);
static void print_uptime_info(outputStream* st);
+ static void print_open_file_descriptors(outputStream* st, char* buf, size_t buflen);
+ static void print_open_file_descriptors(outputStream* st);
};
#endif // OS_BSD_OS_BSD_HPP
diff --git a/src/hotspot/os/bsd/semaphore_bsd.cpp b/src/hotspot/os/bsd/semaphore_bsd.cpp
index 827c955677e..c35712ff2da 100644
--- a/src/hotspot/os/bsd/semaphore_bsd.cpp
+++ b/src/hotspot/os/bsd/semaphore_bsd.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -81,27 +81,37 @@ bool OSXSemaphore::timedwait(int64_t millis) {
// kernel semaphores take a relative timeout
mach_timespec_t waitspec;
- int secs = millis / MILLIUNITS;
- int nsecs = millis_to_nanos(millis % MILLIUNITS);
- waitspec.tv_sec = secs;
- waitspec.tv_nsec = nsecs;
+ int64_t starttime;
+ const bool is_trywait = millis == 0;
- int64_t starttime = os::javaTimeNanos();
+ if (!is_trywait) {
+ int secs = millis / MILLIUNITS;
+ int nsecs = millis_to_nanos(millis % MILLIUNITS);
+ waitspec.tv_sec = secs;
+ waitspec.tv_nsec = nsecs;
+
+ starttime = os::javaTimeNanos();
+ } else {
+ waitspec.tv_sec = 0;
+ waitspec.tv_nsec = 0;
+ }
kr = semaphore_timedwait(_semaphore, waitspec);
while (kr == KERN_ABORTED) {
- // reduce the timeout and try again
- int64_t totalwait = millis_to_nanos(millis);
- int64_t current = os::javaTimeNanos();
- int64_t passedtime = current - starttime;
+ if (!is_trywait) {
+ // reduce the timeout and try again
+ int64_t totalwait = millis_to_nanos(millis);
+ int64_t current = os::javaTimeNanos();
+ int64_t passedtime = current - starttime;
- if (passedtime >= totalwait) {
- waitspec.tv_sec = 0;
- waitspec.tv_nsec = 0;
- } else {
- int64_t waittime = totalwait - (current - starttime);
- waitspec.tv_sec = waittime / NANOSECS_PER_SEC;
- waitspec.tv_nsec = waittime % NANOSECS_PER_SEC;
+ if (passedtime >= totalwait) {
+ waitspec.tv_sec = 0;
+ waitspec.tv_nsec = 0;
+ } else {
+ int64_t waittime = totalwait - (current - starttime);
+ waitspec.tv_sec = waittime / NANOSECS_PER_SEC;
+ waitspec.tv_nsec = waittime % NANOSECS_PER_SEC;
+ }
}
kr = semaphore_timedwait(_semaphore, waitspec);
diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp
index e49d070890e..4a2d75ecdf3 100644
--- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp
+++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp
@@ -28,7 +28,6 @@
#include "cgroupV2Subsystem_linux.hpp"
#include "logging/log.hpp"
#include "memory/allocation.hpp"
-#include "os_linux.hpp"
#include "runtime/globals.hpp"
#include "runtime/os.hpp"
#include "utilities/globalDefinitions.hpp"
@@ -41,6 +40,8 @@
// Inlined from for portability.
#ifndef CGROUP2_SUPER_MAGIC
# define CGROUP2_SUPER_MAGIC 0x63677270
+#else
+ STATIC_ASSERT(CGROUP2_SUPER_MAGIC == 0x63677270);
#endif
// controller names have to match the *_IDX indices
@@ -605,6 +606,11 @@ void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) {
}
}
+void CgroupSubsystem::adjust_controllers(physical_memory_size_type upper_mem_bound, int upper_cpu_bound) {
+ CgroupUtil::adjust_controller(memory_controller()->controller(), upper_mem_bound);
+ CgroupUtil::adjust_controller(cpu_controller()->controller(), upper_cpu_bound);
+}
+
/* active_processor_count
*
* Calculate an appropriate number of active processors for the
@@ -631,7 +637,7 @@ void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) {
* return:
* true if there were no errors. false otherwise.
*/
-bool CgroupSubsystem::active_processor_count(double& value) {
+bool CgroupSubsystem::active_processor_count(int (*cpu_bound_func)(), double& value) {
// We use a cache with a timeout to avoid performing expensive
// computations in the event this function is called frequently.
// [See 8227006].
@@ -643,7 +649,7 @@ bool CgroupSubsystem::active_processor_count(double& value) {
return true;
}
- int cpu_count = os::Linux::active_processor_count();
+ int cpu_count = cpu_bound_func();
double result = -1;
if (!CgroupUtil::processor_count(contrl->controller(), cpu_count, result)) {
return false;
diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp
index d083a9985c2..adde37e1c77 100644
--- a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp
+++ b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp
@@ -278,7 +278,7 @@ class CgroupMemoryController: public CHeapObj {
class CgroupSubsystem: public CHeapObj {
public:
bool memory_limit_in_bytes(physical_memory_size_type upper_bound, physical_memory_size_type& value);
- bool active_processor_count(double& value);
+ bool active_processor_count(int (*cpu_bound_func)(), double& value);
virtual bool pids_max(uint64_t& value) = 0;
virtual bool pids_current(uint64_t& value) = 0;
@@ -291,6 +291,8 @@ class CgroupSubsystem: public CHeapObj {
virtual CachingCgroupController* cpu_controller() = 0;
virtual CgroupCpuacctController* cpuacct_controller() = 0;
+ void adjust_controllers(physical_memory_size_type upper_mem_bound, int upper_cpu_bound);
+
bool cpu_quota(int& value);
bool cpu_period(int& value);
bool cpu_shares(int& value);
diff --git a/src/hotspot/os/linux/cgroupUtil_linux.cpp b/src/hotspot/os/linux/cgroupUtil_linux.cpp
index 570b335940b..f166f6cd5e4 100644
--- a/src/hotspot/os/linux/cgroupUtil_linux.cpp
+++ b/src/hotspot/os/linux/cgroupUtil_linux.cpp
@@ -24,7 +24,6 @@
*/
#include "cgroupUtil_linux.hpp"
-#include "os_linux.hpp"
bool CgroupUtil::processor_count(CgroupCpuController* cpu_ctrl, int upper_bound, double& value) {
assert(upper_bound > 0, "upper bound of cpus must be positive");
@@ -82,7 +81,7 @@ double CgroupUtil::get_updated_cpu_limit(CgroupCpuController* cpu,
return lowest;
}
-void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
+void CgroupUtil::adjust_controller(CgroupMemoryController* mem, physical_memory_size_type upper_bound) {
assert(mem->cgroup_path() != nullptr, "invariant");
if (strstr(mem->cgroup_path(), "../") != nullptr) {
log_warning(os, container)("Cgroup memory controller path at '%s' seems to have moved "
@@ -100,17 +99,16 @@ void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
char* cg_path = os::strdup(orig);
char* last_slash;
assert(cg_path[0] == '/', "cgroup path must start with '/'");
- physical_memory_size_type phys_mem = os::Linux::physical_memory();
char* limit_cg_path = nullptr;
physical_memory_size_type limit = value_unlimited;
- physical_memory_size_type lowest_limit = phys_mem;
- lowest_limit = get_updated_mem_limit(mem, lowest_limit, phys_mem);
- physical_memory_size_type orig_limit = lowest_limit != phys_mem ? lowest_limit : phys_mem;
+ physical_memory_size_type lowest_limit = upper_bound;
+ lowest_limit = get_updated_mem_limit(mem, lowest_limit, upper_bound);
+ physical_memory_size_type orig_limit = lowest_limit != upper_bound ? lowest_limit : upper_bound;
while ((last_slash = strrchr(cg_path, '/')) != cg_path) {
*last_slash = '\0'; // strip path
// update to shortened path and try again
mem->set_subsystem_path(cg_path);
- limit = get_updated_mem_limit(mem, lowest_limit, phys_mem);
+ limit = get_updated_mem_limit(mem, lowest_limit, upper_bound);
if (limit < lowest_limit) {
lowest_limit = limit;
os::free(limit_cg_path); // handles nullptr
@@ -119,13 +117,13 @@ void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
}
// need to check limit at mount point
mem->set_subsystem_path("/");
- limit = get_updated_mem_limit(mem, lowest_limit, phys_mem);
+ limit = get_updated_mem_limit(mem, lowest_limit, upper_bound);
if (limit < lowest_limit) {
lowest_limit = limit;
os::free(limit_cg_path); // handles nullptr
limit_cg_path = os::strdup("/");
}
- assert(lowest_limit <= phys_mem, "limit must not exceed host memory");
+ assert(lowest_limit <= upper_bound, "limit must not exceed upper bound");
if (lowest_limit != orig_limit) {
// we've found a lower limit anywhere in the hierarchy,
// set the path to the limit path
@@ -147,7 +145,7 @@ void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
os::free(limit_cg_path);
}
-void CgroupUtil::adjust_controller(CgroupCpuController* cpu) {
+void CgroupUtil::adjust_controller(CgroupCpuController* cpu, int upper_bound) {
assert(cpu->cgroup_path() != nullptr, "invariant");
if (strstr(cpu->cgroup_path(), "../") != nullptr) {
log_warning(os, container)("Cgroup cpu controller path at '%s' seems to have moved "
@@ -165,17 +163,16 @@ void CgroupUtil::adjust_controller(CgroupCpuController* cpu) {
char* cg_path = os::strdup(orig);
char* last_slash;
assert(cg_path[0] == '/', "cgroup path must start with '/'");
- int host_cpus = os::Linux::active_processor_count();
- int lowest_limit = host_cpus;
- double cpus = get_updated_cpu_limit(cpu, lowest_limit, host_cpus);
- int orig_limit = lowest_limit != host_cpus ? lowest_limit : host_cpus;
+ int lowest_limit = upper_bound;
+ double cpus = get_updated_cpu_limit(cpu, lowest_limit, upper_bound);
+ int orig_limit = lowest_limit != upper_bound ? lowest_limit : upper_bound;
char* limit_cg_path = nullptr;
while ((last_slash = strrchr(cg_path, '/')) != cg_path) {
*last_slash = '\0'; // strip path
// update to shortened path and try again
cpu->set_subsystem_path(cg_path);
- cpus = get_updated_cpu_limit(cpu, lowest_limit, host_cpus);
- if (cpus != host_cpus && cpus < lowest_limit) {
+ cpus = get_updated_cpu_limit(cpu, lowest_limit, upper_bound);
+ if (cpus != upper_bound && cpus < lowest_limit) {
lowest_limit = cpus;
os::free(limit_cg_path); // handles nullptr
limit_cg_path = os::strdup(cg_path);
@@ -183,8 +180,8 @@ void CgroupUtil::adjust_controller(CgroupCpuController* cpu) {
}
// need to check limit at mount point
cpu->set_subsystem_path("/");
- cpus = get_updated_cpu_limit(cpu, lowest_limit, host_cpus);
- if (cpus != host_cpus && cpus < lowest_limit) {
+ cpus = get_updated_cpu_limit(cpu, lowest_limit, upper_bound);
+ if (cpus != upper_bound && cpus < lowest_limit) {
lowest_limit = cpus;
os::free(limit_cg_path); // handles nullptr
limit_cg_path = os::strdup(cg_path);
diff --git a/src/hotspot/os/linux/cgroupUtil_linux.hpp b/src/hotspot/os/linux/cgroupUtil_linux.hpp
index 1fd2a7d872b..68585c22c2d 100644
--- a/src/hotspot/os/linux/cgroupUtil_linux.hpp
+++ b/src/hotspot/os/linux/cgroupUtil_linux.hpp
@@ -35,10 +35,10 @@ class CgroupUtil: AllStatic {
static bool processor_count(CgroupCpuController* cpu, int upper_bound, double& value);
// Given a memory controller, adjust its path to a point in the hierarchy
// that represents the closest memory limit.
- static void adjust_controller(CgroupMemoryController* m);
+ static void adjust_controller(CgroupMemoryController* m, physical_memory_size_type upper_bound);
// Given a cpu controller, adjust its path to a point in the hierarchy
// that represents the closest cpu limit.
- static void adjust_controller(CgroupCpuController* c);
+ static void adjust_controller(CgroupCpuController* c, int upper_bound);
private:
static physical_memory_size_type get_updated_mem_limit(CgroupMemoryController* m,
physical_memory_size_type lowest,
diff --git a/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp b/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
index c8f5a290c99..e42b7a13391 100644
--- a/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
+++ b/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
@@ -326,8 +326,6 @@ CgroupV1Subsystem::CgroupV1Subsystem(CgroupV1Controller* cpuset,
_cpuset(cpuset),
_cpuacct(cpuacct),
_pids(pids) {
- CgroupUtil::adjust_controller(memory);
- CgroupUtil::adjust_controller(cpu);
_memory = new CachingCgroupController(memory);
_cpu = new CachingCgroupController(cpu);
}
diff --git a/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp b/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
index 30e1affc646..edd80bb7427 100644
--- a/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
+++ b/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
@@ -154,8 +154,6 @@ CgroupV2Subsystem::CgroupV2Subsystem(CgroupV2MemoryController * memory,
CgroupV2CpuacctController* cpuacct,
CgroupV2Controller unified) :
_unified(unified) {
- CgroupUtil::adjust_controller(memory);
- CgroupUtil::adjust_controller(cpu);
_memory = new CachingCgroupController(memory);
_cpu = new CachingCgroupController(cpu);
_cpuacct = cpuacct;
diff --git a/src/hotspot/os/linux/hugepages.cpp b/src/hotspot/os/linux/hugepages.cpp
index 5472c093d3f..b065f7b1496 100644
--- a/src/hotspot/os/linux/hugepages.cpp
+++ b/src/hotspot/os/linux/hugepages.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2024, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -35,11 +35,16 @@
#include
ExplicitHugePageSupport::ExplicitHugePageSupport() :
- _initialized(false), _pagesizes(), _default_hugepage_size(SIZE_MAX), _inconsistent(false) {}
+ _initialized{false}, _os_supported{}, _pre_allocated{}, _default_hugepage_size{0}, _inconsistent{false} {}
-os::PageSizes ExplicitHugePageSupport::pagesizes() const {
+os::PageSizes ExplicitHugePageSupport::os_supported() const {
assert(_initialized, "Not initialized");
- return _pagesizes;
+ return _os_supported;
+}
+
+os::PageSizes ExplicitHugePageSupport::pre_allocated() const {
+ assert(_initialized, "Not initialized");
+ return _pre_allocated;
}
size_t ExplicitHugePageSupport::default_hugepage_size() const {
@@ -63,7 +68,7 @@ static size_t scan_default_hugepagesize() {
// format has been changed), we'll set largest page size to 0
FILE *fp = os::fopen("/proc/meminfo", "r");
- if (fp) {
+ if (fp != nullptr) {
while (!feof(fp)) {
int x = 0;
char buf[16];
@@ -76,7 +81,7 @@ static size_t scan_default_hugepagesize() {
// skip to next line
for (;;) {
int ch = fgetc(fp);
- if (ch == EOF || ch == (int)'\n') break;
+ if (ch == EOF || ch == '\n') break;
}
}
}
@@ -129,10 +134,24 @@ static os::PageSizes scan_hugepages() {
return pagesizes;
}
+static os::PageSizes filter_pre_allocated_hugepages(os::PageSizes pagesizes) {
+ os::PageSizes pre_allocated{};
+ char filename[PATH_MAX];
+ for (size_t ps = pagesizes.smallest(); ps != 0; ps = pagesizes.next_larger(ps)) {
+ os::snprintf_checked(filename, sizeof(filename), "%s/hugepages-%zukB/nr_hugepages", sys_hugepages, ps / K);
+ size_t pages;
+ bool read_success = read_number_file(filename, &pages);
+ if (read_success && pages > 0) {
+ pre_allocated.add(ps);
+ }
+ }
+ return pre_allocated;
+}
+
void ExplicitHugePageSupport::print_on(outputStream* os) {
if (_initialized) {
os->print_cr("Explicit hugepage support:");
- for (size_t s = _pagesizes.smallest(); s != 0; s = _pagesizes.next_larger(s)) {
+ for (size_t s = _os_supported.smallest(); s != 0; s = _os_supported.next_larger(s)) {
os->print_cr(" hugepage size: " EXACTFMT, EXACTFMTARGS(s));
}
os->print_cr(" default hugepage size: " EXACTFMT, EXACTFMTARGS(_default_hugepage_size));
@@ -147,14 +166,15 @@ void ExplicitHugePageSupport::print_on(outputStream* os) {
void ExplicitHugePageSupport::scan_os() {
_default_hugepage_size = scan_default_hugepagesize();
if (_default_hugepage_size > 0) {
- _pagesizes = scan_hugepages();
+ _os_supported = scan_hugepages();
+ _pre_allocated = filter_pre_allocated_hugepages(_os_supported);
// See https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt: /proc/meminfo should match
// /sys/kernel/mm/hugepages/hugepages-xxxx. However, we may run on a broken kernel (e.g. on WSL)
// that only exposes /proc/meminfo but not /sys/kernel/mm/hugepages. In that case, we are not
// sure about the state of hugepage support by the kernel, so we won't use explicit hugepages.
- if (!_pagesizes.contains(_default_hugepage_size)) {
+ if (!_os_supported.contains(_default_hugepage_size)) {
log_info(pagesize)("Unexpected configuration: default pagesize (%zu) "
- "has no associated directory in /sys/kernel/mm/hugepages..", _default_hugepage_size);
+ "has no associated directory in /sys/kernel/mm/hugepages.", _default_hugepage_size);
_inconsistent = true;
}
}
@@ -167,7 +187,7 @@ void ExplicitHugePageSupport::scan_os() {
}
THPSupport::THPSupport() :
- _initialized(false), _mode(THPMode::never), _pagesize(SIZE_MAX) {}
+ _initialized{false}, _mode{THPMode::never}, _pagesize{0} {}
THPMode THPSupport::mode() const {
@@ -201,7 +221,6 @@ void THPSupport::scan_os() {
}
// Scan large page size for THP from hpage_pmd_size
- _pagesize = 0;
if (read_number_file("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", &_pagesize)) {
assert(_pagesize > 0, "Expected");
}
diff --git a/src/hotspot/os/linux/hugepages.hpp b/src/hotspot/os/linux/hugepages.hpp
index efd27c55fd6..5a9767b4ff8 100644
--- a/src/hotspot/os/linux/hugepages.hpp
+++ b/src/hotspot/os/linux/hugepages.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2024, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -45,7 +45,10 @@ class ExplicitHugePageSupport {
// All supported hugepage sizes (sizes for which entries exist
// in /sys/kernel/mm/hugepages/hugepage-xxx)
- os::PageSizes _pagesizes;
+ os::PageSizes _os_supported;
+
+ // Above pages filtered for where the contents of file nr_hugepages was larger than zero
+ os::PageSizes _pre_allocated;
// Contains the default hugepage. The "default hugepage size" is the one that
// - is marked in /proc/meminfo as "Hugepagesize"
@@ -60,7 +63,8 @@ public:
void scan_os();
- os::PageSizes pagesizes() const;
+ os::PageSizes os_supported() const;
+ os::PageSizes pre_allocated() const;
size_t default_hugepage_size() const;
void print_on(outputStream* os);
diff --git a/src/hotspot/os/linux/osContainer_linux.cpp b/src/hotspot/os/linux/osContainer_linux.cpp
index b46263efd99..da2cbf381e6 100644
--- a/src/hotspot/os/linux/osContainer_linux.cpp
+++ b/src/hotspot/os/linux/osContainer_linux.cpp
@@ -59,6 +59,11 @@ void OSContainer::init() {
if (cgroup_subsystem == nullptr) {
return; // Required subsystem files not found or other error
}
+ // Adjust controller paths once subsystem is initialized
+ physical_memory_size_type phys_mem = os::Linux::physical_memory();
+ int host_cpus = os::Linux::active_processor_count();
+ cgroup_subsystem->adjust_controllers(phys_mem, host_cpus);
+
/*
* In order to avoid a false positive on is_containerized() on
* Linux systems outside a container *and* to ensure compatibility
@@ -252,7 +257,7 @@ char * OSContainer::cpu_cpuset_memory_nodes() {
bool OSContainer::active_processor_count(double& value) {
assert(cgroup_subsystem != nullptr, "cgroup subsystem not available");
- return cgroup_subsystem->active_processor_count(value);
+ return cgroup_subsystem->active_processor_count(&os::Linux::active_processor_count, value);
}
bool OSContainer::cpu_quota(int& value) {
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index 7190845a8ba..a87c0ab33fa 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -83,6 +83,7 @@
#endif
# include
+# include
# include
# include
# include
@@ -113,6 +114,7 @@
# include
# include
# include
+# include
# include
#ifdef __GLIBC__
# include
@@ -1311,7 +1313,7 @@ bool os::is_primordial_thread(void) {
// Find the virtual memory area that contains addr
static bool find_vma(address addr, address* vma_low, address* vma_high) {
FILE *fp = os::fopen("/proc/self/maps", "r");
- if (fp) {
+ if (fp != nullptr) {
address low, high;
while (!feof(fp)) {
if (fscanf(fp, "%p-%p", &low, &high) == 2) {
@@ -1324,7 +1326,7 @@ static bool find_vma(address addr, address* vma_low, address* vma_high) {
}
for (;;) {
int ch = fgetc(fp);
- if (ch == EOF || ch == (int)'\n') break;
+ if (ch == EOF || ch == '\n') break;
}
}
fclose(fp);
@@ -3523,6 +3525,9 @@ bool os::pd_uncommit_memory(char* addr, size_t size, bool exec) {
log_trace(os, map)("mmap failed: " RANGEFMT " errno=(%s)",
RANGEFMTARGS(addr, size),
os::strerror(ep.saved_errno()));
+ if (ep.saved_errno() == ENOMEM) {
+ fatal("Failed to uncommit " RANGEFMT ". It is possible that the process's maximum number of mappings would have been exceeded. Try increasing the limit.", RANGEFMTARGS(addr, size));
+ }
return false;
}
return true;
@@ -3633,14 +3638,16 @@ bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
// It's safe to always unmap guard pages for primordial thread because we
// always place it right after end of the mapped region.
-bool os::remove_stack_guard_pages(char* addr, size_t size) {
- uintptr_t stack_extent, stack_base;
+void os::remove_stack_guard_pages(char* addr, size_t size) {
if (os::is_primordial_thread()) {
- return ::munmap(addr, size) == 0;
+ if (::munmap(addr, size) != 0) {
+ fatal("Failed to munmap " RANGEFMT, RANGEFMTARGS(addr, size));
+ }
+ return;
}
- return os::uncommit_memory(addr, size);
+ os::uncommit_memory(addr, size);
}
// 'requested_addr' is only treated as a hint, the return value may or
@@ -3809,8 +3816,8 @@ static int hugetlbfs_page_size_flag(size_t page_size) {
}
static bool hugetlbfs_sanity_check(size_t page_size) {
- const os::PageSizes page_sizes = HugePages::explicit_hugepage_info().pagesizes();
- assert(page_sizes.contains(page_size), "Invalid page sizes passed");
+ const os::PageSizes os_supported = HugePages::explicit_hugepage_info().os_supported();
+ assert(os_supported.contains(page_size), "Invalid page sizes passed (%zu)", page_size);
// Include the page size flag to ensure we sanity check the correct page size.
int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_HUGETLB | hugetlbfs_page_size_flag(page_size);
@@ -3824,16 +3831,16 @@ static bool hugetlbfs_sanity_check(size_t page_size) {
log_info(pagesize)("Large page size (" EXACTFMT ") failed sanity check, "
"checking if smaller large page sizes are usable",
EXACTFMTARGS(page_size));
- for (size_t page_size_ = page_sizes.next_smaller(page_size);
- page_size_ > os::vm_page_size();
- page_size_ = page_sizes.next_smaller(page_size_)) {
- flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_HUGETLB | hugetlbfs_page_size_flag(page_size_);
- p = mmap(nullptr, page_size_, PROT_READ|PROT_WRITE, flags, -1, 0);
+ for (size_t size = os_supported.next_smaller(page_size);
+ size > os::vm_page_size();
+ size = os_supported.next_smaller(size)) {
+ flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_HUGETLB | hugetlbfs_page_size_flag(size);
+ p = mmap(nullptr, size, PROT_READ|PROT_WRITE, flags, -1, 0);
if (p != MAP_FAILED) {
// Mapping succeeded, sanity check passed.
- munmap(p, page_size_);
+ munmap(p, size);
log_info(pagesize)("Large page size (" EXACTFMT ") passed sanity check",
- EXACTFMTARGS(page_size_));
+ EXACTFMTARGS(size));
return true;
}
}
@@ -4015,7 +4022,7 @@ void os::Linux::large_page_init() {
// - os::large_page_size() is the default explicit hugepage size (/proc/meminfo "Hugepagesize")
// - os::pagesizes() contains all hugepage sizes the kernel supports, regardless whether there
// are pages configured in the pool or not (from /sys/kernel/hugepages/hugepage-xxxx ...)
- os::PageSizes all_large_pages = HugePages::explicit_hugepage_info().pagesizes();
+ os::PageSizes all_large_pages = HugePages::explicit_hugepage_info().os_supported();
const size_t default_large_page_size = HugePages::default_explicit_hugepage_size();
// 3) Consistency check and post-processing
@@ -4057,10 +4064,10 @@ void os::Linux::large_page_init() {
_large_page_size = large_page_size;
- // Populate _page_sizes with large page sizes less than or equal to
- // _large_page_size.
- for (size_t page_size = _large_page_size; page_size != 0;
- page_size = all_large_pages.next_smaller(page_size)) {
+ // Populate _page_sizes with _large_page_size (default large page size) even if not pre-allocated.
+ // Then, populate _page_sizes with all smaller large page sizes that have been pre-allocated.
+ os::PageSizes pre_allocated = HugePages::explicit_hugepage_info().pre_allocated();
+ for (size_t page_size = _large_page_size; page_size != 0; page_size = pre_allocated.next_smaller(page_size)) {
_page_sizes.add(page_size);
}
}
@@ -4124,12 +4131,12 @@ static char* reserve_memory_special_huge_tlbfs(size_t bytes,
size_t page_size,
char* req_addr,
bool exec) {
- const os::PageSizes page_sizes = HugePages::explicit_hugepage_info().pagesizes();
+ const os::PageSizes os_supported = HugePages::explicit_hugepage_info().os_supported();
assert(UseLargePages, "only for Huge TLBFS large pages");
assert(is_aligned(req_addr, alignment), "Must be");
assert(is_aligned(req_addr, page_size), "Must be");
assert(is_aligned(alignment, os::vm_allocation_granularity()), "Must be");
- assert(page_sizes.contains(page_size), "Must be a valid page size");
+ assert(os_supported.contains(page_size), "Must be a valid page size");
assert(page_size > os::vm_page_size(), "Must be a large page size");
assert(bytes >= page_size, "Shouldn't allocate large pages for small sizes");
@@ -4203,12 +4210,6 @@ char* os::pd_reserve_memory_special(size_t bytes, size_t alignment, size_t page_
return addr;
}
-bool os::pd_release_memory_special(char* base, size_t bytes) {
- assert(UseLargePages, "only for large pages");
- // Plain munmap is sufficient
- return pd_release_memory(base, bytes);
-}
-
size_t os::large_page_size() {
return _large_page_size;
}
@@ -4381,7 +4382,7 @@ int os::Linux::get_namespace_pid(int vmid) {
os::snprintf_checked(fname, sizeof(fname), "/proc/%d/status", vmid);
FILE *fp = os::fopen(fname, "r");
- if (fp) {
+ if (fp != nullptr) {
int pid, nspid;
int ret;
while (!feof(fp) && !ferror(fp)) {
@@ -4395,7 +4396,7 @@ int os::Linux::get_namespace_pid(int vmid) {
}
for (;;) {
int ch = fgetc(fp);
- if (ch == EOF || ch == (int)'\n') break;
+ if (ch == EOF || ch == '\n') break;
}
}
fclose(fp);
@@ -4550,6 +4551,7 @@ void os::Linux::numa_init() {
FLAG_SET_ERGO_IF_DEFAULT(UseNUMAInterleaving, true);
}
+#if INCLUDE_PARALLELGC
if (UseParallelGC && UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
// With static large pages we cannot uncommit a page, so there's no way
// we can make the adaptive lgrp chunk resizing work. If the user specified both
@@ -4561,6 +4563,7 @@ void os::Linux::numa_init() {
UseAdaptiveNUMAChunkSizing = false;
}
}
+#endif
}
void os::Linux::disable_numa(const char* reason, bool warning) {
@@ -5428,3 +5431,31 @@ bool os::pd_dll_unload(void* libhandle, char* ebuf, int ebuflen) {
return res;
} // end: os::pd_dll_unload()
+
+void os::print_open_file_descriptors(outputStream* st) {
+ DIR* dirp = opendir("/proc/self/fd");
+ int fds = 0;
+ struct dirent* dentp;
+ const jlong TIMEOUT_NS = 50000000L; // 50 ms in nanoseconds
+ bool timed_out = false;
+
+ // limit proc file read to 50ms
+ jlong start = os::javaTimeNanos();
+ assert(dirp != nullptr, "No proc fs?");
+ while ((dentp = readdir(dirp)) != nullptr && !timed_out) {
+ if (isdigit(dentp->d_name[0])) fds++;
+ if (fds % 100 == 0) {
+ jlong now = os::javaTimeNanos();
+ if ((now - start) > TIMEOUT_NS) {
+ timed_out = true;
+ }
+ }
+ }
+
+ closedir(dirp);
+ if (timed_out) {
+ st->print_cr("Open File Descriptors: > %d", fds);
+ } else {
+ st->print_cr("Open File Descriptors: %d", fds);
+ }
+}
diff --git a/src/hotspot/os/posix/dtrace/hotspot_jni.d b/src/hotspot/os/posix/dtrace/hotspot_jni.d
index c5676921b37..1937769dcb2 100644
--- a/src/hotspot/os/posix/dtrace/hotspot_jni.d
+++ b/src/hotspot/os/posix/dtrace/hotspot_jni.d
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -366,6 +366,8 @@ provider hotspot_jni {
probe IsInstanceOf__return(uintptr_t);
probe IsSameObject__entry(void*, void*, void*);
probe IsSameObject__return(uintptr_t);
+ probe IsVirtualThread__entry(void*, void*);
+ probe IsVirtualThread__return(uintptr_t);
probe MonitorEnter__entry(void*, void*);
probe MonitorEnter__return(uint32_t);
probe MonitorExit__entry(void*, void*);
diff --git a/src/hotspot/os/posix/include/jvm_md.h b/src/hotspot/os/posix/include/jvm_md.h
index eb8e1f0d7e9..061ef17aaae 100644
--- a/src/hotspot/os/posix/include/jvm_md.h
+++ b/src/hotspot/os/posix/include/jvm_md.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -62,6 +62,19 @@
#define JVM_X_OK X_OK
#define JVM_F_OK F_OK
+#if defined(AIX)
+#include "jni_md.h"
+#include "dl_info.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+JNIEXPORT int JVM_dladdr(void* addr, Dl_info* info);
+#ifdef __cplusplus
+}
+#endif
+#endif
+
/*
* File I/O
*/
diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp
index 5412e2bc92d..1fb2a248bec 100644
--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@@ -888,6 +888,14 @@ void* os::lookup_function(const char* name) {
return dlsym(RTLD_DEFAULT, name);
}
+int64_t os::ftell(FILE* file) {
+ return ::ftell(file);
+}
+
+int os::fseek(FILE* file, int64_t offset, int whence) {
+ return ::fseek(file, offset, whence);
+}
+
jlong os::lseek(int fd, jlong offset, int whence) {
return (jlong) ::lseek(fd, offset, whence);
}
@@ -906,8 +914,25 @@ FILE* os::fdopen(int fd, const char* mode) {
ssize_t os::pd_write(int fd, const void *buf, size_t nBytes) {
ssize_t res;
+#ifdef __APPLE__
+ // macOS fails for individual write operations > 2GB.
+ // See https://gitlab.haskell.org/ghc/ghc/-/issues/17414
+ ssize_t total = 0;
+ while (nBytes > 0) {
+ size_t bytes_to_write = MIN2(nBytes, (size_t)INT_MAX);
+ RESTARTABLE(::write(fd, buf, bytes_to_write), res);
+ if (res == OS_ERR) {
+ return OS_ERR;
+ }
+ buf = (const char*)buf + res;
+ nBytes -= res;
+ total += res;
+ }
+ return total;
+#else
RESTARTABLE(::write(fd, buf, nBytes), res);
return res;
+#endif
}
ssize_t os::read_at(int fd, void *buf, unsigned int nBytes, jlong offset) {
diff --git a/src/hotspot/os/posix/perfMemory_posix.cpp b/src/hotspot/os/posix/perfMemory_posix.cpp
index ce9c2a4f031..c5046797e02 100644
--- a/src/hotspot/os/posix/perfMemory_posix.cpp
+++ b/src/hotspot/os/posix/perfMemory_posix.cpp
@@ -494,6 +494,7 @@ static char* get_user_name(uid_t uid) {
return user_name;
}
+#ifndef __APPLE__
// return the name of the user that owns the process identified by vmid.
//
// This method uses a slow directory search algorithm to find the backing
@@ -657,6 +658,7 @@ static char* get_user_name(int vmid, int *nspid, TRAPS) {
#endif
return result;
}
+#endif
// return the file name of the backing store file for the named
// shared memory region for the given user name and vmid.
@@ -699,6 +701,39 @@ static void remove_file(const char* path) {
}
}
+// Files newer than this threshold are considered to belong to a JVM that may
+// still be starting up and are therefore not candidates for stale-file
+// cleanup. This avoids racing a concurrent JVM startup while scanning the
+// hsperfdata directory.
+static const time_t cleanup_grace_period_seconds = 5;
+
+static bool is_cleanup_candidate(const char* filename, const char* dirname) {
+ struct stat statbuf;
+ int result;
+
+ RESTARTABLE(::lstat(filename, &statbuf), result);
+ if (result == OS_ERR) {
+ log_debug(perf, memops)("lstat failed for %s/%s: %s", dirname, filename, os::strerror(errno));
+ return false;
+ }
+
+ if (!S_ISREG(statbuf.st_mode)) {
+ return false;
+ }
+
+ const time_t now = time(nullptr);
+ if (now == (time_t)-1) {
+ return false;
+ }
+
+ if (statbuf.st_mtime >= now - cleanup_grace_period_seconds) {
+ log_debug(perf, memops)("Skip cleanup of fresh file %s/%s", dirname, filename);
+ return false;
+ }
+
+ return true;
+}
+
// cleanup stale shared memory files
//
// This method attempts to remove all stale shared memory files in
@@ -742,6 +777,11 @@ static void cleanup_sharedmem_files(const char* dirname) {
continue;
}
+ if (!is_cleanup_candidate(filename, dirname)) {
+ errno = 0;
+ continue;
+ }
+
#if defined(LINUX)
// Special case on Linux, if multiple containers share the
// same /tmp directory:
@@ -870,16 +910,56 @@ static int create_sharedmem_file(const char* dirname, const char* filename, size
return -1;
}
- // Open the filename in the current directory.
- // Cannot use O_TRUNC here; truncation of an existing file has to happen
- // after the is_file_secure() check below.
- int fd;
- RESTARTABLE(os::open(filename, O_RDWR|O_CREAT|O_NOFOLLOW, S_IRUSR|S_IWUSR), fd);
+ int fd = OS_ERR;
+ static const int create_sharedmem_file_retry_count = LINUX_ONLY(3) NOT_LINUX(1);
+ for (int attempt = 0; attempt < create_sharedmem_file_retry_count; attempt++) {
+ // Open the filename in the current directory.
+ // Use O_EXCL so that startup never reuses an existing pid file unless it
+ // has first been proven stale and removed in `cleanup_sharedmem_files`.
+ RESTARTABLE(os::open(filename, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, S_IRUSR|S_IWUSR), fd);
+ if (fd == OS_ERR) {
+ break;
+ }
+
+#if defined(LINUX)
+ // On Linux, different containerized processes that share the same /tmp
+ // directory (e.g., with "docker --volume ...") may have the same pid and
+ // try to use the same file. To avoid conflicts among such processes, we
+ // allow only one of them (the winner of the flock() call) to write to the
+ // file. If we lose the race, assume we may have collided with a concurrent
+ // scavenger briefly holding the lock on a fresh file and retry a few times
+ // before giving up.
+ int n;
+ RESTARTABLE(::flock(fd, LOCK_EX|LOCK_NB), n);
+ if (n == 0) {
+ break;
+ }
+
+ const int flock_errno = errno;
+ ::close(fd);
+ fd = OS_ERR;
+
+ if (attempt + 1 == create_sharedmem_file_retry_count || flock_errno != EWOULDBLOCK) {
+ log_warning(perf, memops)("Cannot use file %s/%s because %s (errno = %d)", dirname, filename,
+ (flock_errno == EWOULDBLOCK) ?
+ "it is locked by another process" :
+ "flock() failed", flock_errno);
+ errno = flock_errno;
+ break;
+ }
+
+ // Short sleep to allow the lock to free up.
+ os::naked_short_sleep(1);
+#endif
+ }
+
if (fd == OS_ERR) {
if (log_is_enabled(Debug, perf)) {
LogStreamHandle(Debug, perf) log;
if (errno == ELOOP) {
log.print_cr("file %s is a symlink and is not secure", filename);
+ } else if (errno == EEXIST) {
+ log.print_cr("could not create file %s: existing file is not provably stale", filename);
} else {
log.print_cr("could not create file %s: %s", filename, os::strerror(errno));
}
@@ -899,27 +979,7 @@ static int create_sharedmem_file(const char* dirname, const char* filename, size
}
#if defined(LINUX)
- // On Linux, different containerized processes that share the same /tmp
- // directory (e.g., with "docker --volume ...") may have the same pid and
- // try to use the same file. To avoid conflicts among such
- // processes, we allow only one of them (the winner of the flock() call)
- // to write to the file. All the other processes will give up and will
- // have perfdata disabled.
- //
- // Note that the flock will be automatically given up when the winner
- // process exits.
- //
- // The locking protocol works only with other JVMs that have the JDK-8286030
- // fix. If you are sharing the /tmp difrectory among different containers,
- // do not use older JVMs that don't have this fix, or the behavior is undefined.
- int n;
- RESTARTABLE(::flock(fd, LOCK_EX|LOCK_NB), n);
- if (n != 0) {
- log_warning(perf, memops)("Cannot use file %s/%s because %s (errno = %d)", dirname, filename,
- (errno == EWOULDBLOCK) ?
- "it is locked by another process" :
- "flock() failed", errno);
- ::close(fd);
+ if (fd == OS_ERR) {
return -1;
}
#endif
@@ -1082,18 +1142,9 @@ static char* mmap_create_shared(size_t size) {
// release a named shared memory region that was mmap-ed.
//
static void unmap_shared(char* addr, size_t bytes) {
- int res;
- if (MemTracker::enabled()) {
- MemTracker::NmtVirtualMemoryLocker nvml;
- res = ::munmap(addr, bytes);
- if (res == 0) {
- MemTracker::record_virtual_memory_release(addr, bytes);
- }
- } else {
- res = ::munmap(addr, bytes);
- }
- if (res != 0) {
- log_info(os)("os::release_memory failed (" PTR_FORMAT ", %zu)", p2i(addr), bytes);
+ MemTracker::record_virtual_memory_release(addr, bytes);
+ if (::munmap(addr, bytes) != 0) {
+ fatal("os::release_memory failed (" PTR_FORMAT ", %zu)", p2i(addr), bytes);
}
}
diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp
index b0b7ae18106..9d8fb45f0d1 100644
--- a/src/hotspot/os/windows/os_windows.cpp
+++ b/src/hotspot/os/windows/os_windows.cpp
@@ -2528,12 +2528,6 @@ LONG Handle_Exception(struct _EXCEPTION_POINTERS* exceptionInfo,
return EXCEPTION_CONTINUE_EXECUTION;
}
-
-// Used for PostMortemDump
-extern "C" void safepoints();
-extern "C" void find(int x);
-extern "C" void events();
-
// According to Windows API documentation, an illegal instruction sequence should generate
// the 0xC000001C exception code. However, real world experience shows that occasionnaly
// the execution of an illegal instruction can generate the exception code 0xC000001E. This
@@ -3281,11 +3275,10 @@ static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int fi
// Do manual alignment
aligned_base = align_up(extra_base, alignment);
- bool rc = (file_desc != -1) ? os::unmap_memory(extra_base, extra_size) :
- os::release_memory(extra_base, extra_size);
- assert(rc, "release failed");
- if (!rc) {
- return nullptr;
+ if (file_desc != -1) {
+ os::unmap_memory(extra_base, extra_size);
+ } else {
+ os::release_memory(extra_base, extra_size);
}
// Attempt to map, into the just vacated space, the slightly smaller aligned area.
@@ -3518,11 +3511,6 @@ char* os::pd_reserve_memory_special(size_t bytes, size_t alignment, size_t page_
return reserve_large_pages(bytes, addr, exec);
}
-bool os::pd_release_memory_special(char* base, size_t bytes) {
- assert(base != nullptr, "Sanity check");
- return pd_release_memory(base, bytes);
-}
-
static void warn_fail_commit_memory(char* addr, size_t bytes, bool exec) {
int err = os::get_last_error();
char buf[256];
@@ -3681,8 +3669,8 @@ bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
return os::commit_memory(addr, size, !ExecMem);
}
-bool os::remove_stack_guard_pages(char* addr, size_t size) {
- return os::uncommit_memory(addr, size);
+void os::remove_stack_guard_pages(char* addr, size_t size) {
+ os::uncommit_memory(addr, size);
}
static bool protect_pages_individually(char* addr, size_t bytes, unsigned int p, DWORD *old_status) {
@@ -5120,6 +5108,13 @@ jlong os::seek_to_file_offset(int fd, jlong offset) {
return (jlong)::_lseeki64(fd, (__int64)offset, SEEK_SET);
}
+int64_t os::ftell(FILE* file) {
+ return ::_ftelli64(file);
+}
+
+int os::fseek(FILE* file, int64_t offset, int whence) {
+ return ::_fseeki64(file,offset, whence);
+}
jlong os::lseek(int fd, jlong offset, int whence) {
return (jlong) ::_lseeki64(fd, offset, whence);
@@ -6282,6 +6277,10 @@ const void* os::get_saved_assert_context(const void** sigInfo) {
return nullptr;
}
+void os::print_open_file_descriptors(outputStream* st) {
+ // File descriptor counting not supported on Windows.
+}
+
/*
* Windows/x64 does not use stack frames the way expected by Java:
* [1] in most cases, there is no frame pointer. All locals are addressed via RSP
diff --git a/src/hotspot/os/windows/perfMemory_windows.cpp b/src/hotspot/os/windows/perfMemory_windows.cpp
index f54a2b52cca..dad2804f18a 100644
--- a/src/hotspot/os/windows/perfMemory_windows.cpp
+++ b/src/hotspot/os/windows/perfMemory_windows.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1682,12 +1682,7 @@ void PerfMemory::detach(char* addr, size_t bytes) {
return;
}
- if (MemTracker::enabled()) {
- // it does not go through os api, the operation has to record from here
- MemTracker::NmtVirtualMemoryLocker nvml;
- remove_file_mapping(addr);
- MemTracker::record_virtual_memory_release(addr, bytes);
- } else {
- remove_file_mapping(addr);
- }
+ // it does not go through os api, the operation has to record from here
+ MemTracker::record_virtual_memory_release(addr, bytes);
+ remove_file_mapping(addr);
}
diff --git a/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp b/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp
index afef21b091a..3ab81697280 100644
--- a/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp
+++ b/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -412,12 +412,8 @@ run_stub:
}
void os::Aix::init_thread_fpu_state(void) {
-#if !defined(USE_XLC_BUILTINS)
// Disable FP exceptions.
__asm__ __volatile__ ("mtfsfi 6,0");
-#else
- __mtfsfi(6, 0);
-#endif
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/src/hotspot/os_cpu/aix_ppc/prefetch_aix_ppc.inline.hpp b/src/hotspot/os_cpu/aix_ppc/prefetch_aix_ppc.inline.hpp
index c741335b5f0..d9dac0e231f 100644
--- a/src/hotspot/os_cpu/aix_ppc/prefetch_aix_ppc.inline.hpp
+++ b/src/hotspot/os_cpu/aix_ppc/prefetch_aix_ppc.inline.hpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2013 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,29 +29,21 @@
// Included in runtime/prefetch.inline.hpp
inline void Prefetch::read(const void *loc, intx interval) {
-#if !defined(USE_XLC_BUILTINS)
__asm__ __volatile__ (
" dcbt 0, %0 \n"
:
: /*%0*/"r" ( ((address)loc) +((long)interval) )
//:
);
-#else
- __dcbt(((address)loc) +((long)interval));
-#endif
}
inline void Prefetch::write(void *loc, intx interval) {
-#if !defined(USE_XLC_BUILTINS)
__asm__ __volatile__ (
" dcbtst 0, %0 \n"
:
: /*%0*/"r" ( ((address)loc) +((long)interval) )
//:
);
-#else
- __dcbtst( ((address)loc) +((long)interval) );
-#endif
}
#endif // OS_CPU_AIX_PPC_PREFETCH_AIX_PPC_INLINE_HPP
diff --git a/src/hotspot/share/cds/aotGrowableArray.cpp b/src/hotspot/os_cpu/aix_ppc/vm_version_aix_ppc.cpp
similarity index 78%
rename from src/hotspot/share/cds/aotGrowableArray.cpp
rename to src/hotspot/os_cpu/aix_ppc/vm_version_aix_ppc.cpp
index ec63e7aa57f..8cc8b715201 100644
--- a/src/hotspot/share/cds/aotGrowableArray.cpp
+++ b/src/hotspot/os_cpu/aix_ppc/vm_version_aix_ppc.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -22,13 +23,14 @@
*
*/
-#include "cds/aotGrowableArray.hpp"
-#include "cds/aotMetaspace.hpp"
-#include "memory/allocation.inline.hpp"
-#include "utilities/growableArray.hpp"
+#include "runtime/vm_version.hpp"
-void AOTGrowableArrayHelper::deallocate(void* mem) {
- if (!AOTMetaspace::in_aot_cache(mem)) {
- GrowableArrayCHeapAllocator::deallocate(mem);
- }
+#include
+
+int VM_Version::get_dcache_line_size() {
+ return _system_configuration.dcache_line;
+}
+
+int VM_Version::get_icache_line_size() {
+ return _system_configuration.icache_line;
}
diff --git a/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp b/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp
index 62dba218b2f..49d879731ff 100644
--- a/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp
+++ b/src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp
@@ -54,8 +54,11 @@
#include "signals_posix.hpp"
#include "utilities/align.hpp"
#include "utilities/debug.hpp"
+#include "utilities/decoder.hpp"
#include "utilities/events.hpp"
+#include "utilities/nativeStackPrinter.hpp"
#include "utilities/vmError.hpp"
+#include "compiler/disassembler.hpp"
// put OS-includes here
# include
@@ -85,6 +88,8 @@
#define SPELL_REG_SP "sp"
#ifdef __APPLE__
+WXMode DefaultWXWriteMode;
+
// see darwin-xnu/osfmk/mach/arm/_structs.h
// 10.5 UNIX03 member name prefixes
@@ -233,19 +238,56 @@ NOINLINE frame os::current_frame() {
bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
ucontext_t* uc, JavaThread* thread) {
- // Enable WXWrite: this function is called by the signal handler at arbitrary
- // point of execution.
- ThreadWXEnable wx(WXWrite, thread);
-
// decide if this trap can be handled by a stub
address stub = nullptr;
-
- address pc = nullptr;
+ address pc = nullptr;
//%note os_trap_1
if (info != nullptr && uc != nullptr && thread != nullptr) {
pc = (address) os::Posix::ucontext_get_pc(uc);
+#ifdef MACOS_AARCH64
+ // If we got a SIGBUS because we tried to write into the code
+ // cache, try enabling WXWrite mode.
+ if (sig == SIGBUS
+ && pc != info->si_addr
+ && CodeCache::contains(info->si_addr)
+ && os::address_is_in_vm(pc)) {
+ WXMode *entry_mode = thread->_cur_wx_mode;
+ if (entry_mode != nullptr && *entry_mode == WXArmedForWrite) {
+ if (TraceWXHealing) {
+ static const char *mode_names[3] = {"WXWrite", "WXExec", "WXArmedForWrite"};
+ tty->print("Healing WXMode %s at %p to WXWrite",
+ mode_names[*entry_mode], entry_mode);
+ char name[128];
+ int offset = 0;
+ if (os::dll_address_to_function_name(pc, name, sizeof name, &offset)) {
+ tty->print_cr(" (%s+0x%x)", name, offset);
+ } else {
+ tty->cr();
+ }
+ if (Verbose) {
+ char buf[O_BUFLEN];
+ NativeStackPrinter nsp(thread);
+ nsp.print_stack(tty, buf, sizeof(buf), pc,
+ true /* print_source_info */, -1 /* max stack */);
+ }
+ }
+#ifndef PRODUCT
+ guarantee(StressWXHealing,
+ "We should not reach here unless StressWXHealing");
+#endif
+ *(thread->_cur_wx_mode) = WXWrite;
+ return thread->wx_enable_write();
+ }
+ }
+
+ // There may be cases where code after this point that we call
+ // from the signal handler changes WX state, so we protect against
+ // that by saving and restoring the state.
+ ThreadWXEnable wx(thread->get_wx_state(), thread);
+#endif
+
// Handle ALL stack overflow variations here
if (sig == SIGSEGV || sig == SIGBUS) {
address addr = (address) info->si_addr;
@@ -515,11 +557,42 @@ int os::extra_bang_size_in_bytes() {
return 0;
}
-#ifdef __APPLE__
+#ifdef MACOS_AARCH64
+THREAD_LOCAL bool os::_jit_exec_enabled;
+
+// This is a wrapper around the standard library function
+// pthread_jit_write_protect_np(3). We keep track of the state of
+// per-thread write protection on the MAP_JIT region in the
+// thread-local variable os::_jit_exec_enabled
void os::current_thread_enable_wx(WXMode mode) {
- pthread_jit_write_protect_np(mode == WXExec);
+ bool exec_enabled = mode != WXWrite;
+ if (exec_enabled != _jit_exec_enabled NOT_PRODUCT( || DefaultWXWriteMode == WXWrite)) {
+ permit_forbidden_function::pthread_jit_write_protect_np(exec_enabled);
+ _jit_exec_enabled = exec_enabled;
+ }
}
-#endif
+
+// If the current thread is in the WX state WXArmedForWrite, change
+// the state to WXWrite.
+bool Thread::wx_enable_write() {
+ if (_wx_state == WXArmedForWrite) {
+ _wx_state = WXWrite;
+ os::current_thread_enable_wx(WXWrite);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// A wrapper around wx_enable_write() for when the current thread is
+// not known.
+void os::thread_wx_enable_write_impl() {
+ if (!StressWXHealing) {
+ Thread::current()->wx_enable_write();
+ }
+}
+
+#endif // MACOS_AARCH64
static inline void atomic_copy64(const volatile void *src, volatile void *dst) {
*(jlong *) dst = *(const jlong *) src;
@@ -547,6 +620,8 @@ extern "C" {
assert(VM_Version::supports_sb(), "current CPU does not support SB instruction");
asm volatile(".inst 0xd50330ff" : : : "memory");
break;
+ case SpinWait::WFET:
+ ShouldNotReachHere();
#ifdef ASSERT
default:
ShouldNotReachHere();
diff --git a/src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp b/src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp
index 6c8684718fc..8e45490e5b6 100644
--- a/src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp
+++ b/src/hotspot/os_cpu/bsd_zero/atomicAccess_bsd_zero.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2011, 2015, Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -27,7 +27,6 @@
#define OS_CPU_BSD_ZERO_ATOMICACCESS_BSD_ZERO_HPP
#include "orderAccess_bsd_zero.hpp"
-#include "runtime/os.hpp"
// Implementation of class AtomicAccess
diff --git a/src/hotspot/os_cpu/linux_aarch64/ic_ivau_probe_linux_aarch64.S b/src/hotspot/os_cpu/linux_aarch64/ic_ivau_probe_linux_aarch64.S
new file mode 100644
index 00000000000..b82053d37b9
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_aarch64/ic_ivau_probe_linux_aarch64.S
@@ -0,0 +1,69 @@
+/*
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "defs.S.inc"
+
+ # Probe whether IC IVAU is trapped.
+ #
+ # Returns 1 if IC IVAU is trapped (did not fault), 0 if not trapped
+ # (faulted on VA 0x0, signal handler redirected to continuation).
+ #
+ # int ic_ivau_probe(void);
+DECLARE_FUNC(ic_ivau_probe):
+DECLARE_FUNC(_ic_ivau_probe_fault):
+ ic ivau, xzr
+ mov x0, #1
+ ret
+DECLARE_FUNC(_ic_ivau_probe_continuation):
+ mov x0, #0
+ ret
+
+/* Emit .note.gnu.property section in case of PAC or BTI being enabled. */
+#ifdef __ARM_FEATURE_BTI_DEFAULT
+ #ifdef __ARM_FEATURE_PAC_DEFAULT
+ #define GNU_PROPERTY_AARCH64_FEATURE 3
+ #else
+ #define GNU_PROPERTY_AARCH64_FEATURE 1
+ #endif
+#else
+ #ifdef __ARM_FEATURE_PAC_DEFAULT
+ #define GNU_PROPERTY_AARCH64_FEATURE 2
+ #else
+ #define GNU_PROPERTY_AARCH64_FEATURE 0
+ #endif
+#endif
+
+#if (GNU_PROPERTY_AARCH64_FEATURE != 0)
+ .pushsection .note.gnu.property, "a"
+ .align 3
+ .long 4 /* name length */
+ .long 0x10 /* data length */
+ .long 5 /* note type: NT_GNU_PROPERTY_TYPE_0 */
+ .string "GNU" /* vendor name */
+ .long 0xc0000000 /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */
+ .long 4 /* pr_datasze */
+ .long GNU_PROPERTY_AARCH64_FEATURE
+ .long 0
+ .popsection
+#endif
diff --git a/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.cpp
new file mode 100644
index 00000000000..11911a48e06
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "runtime/icache.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+DEBUG_ONLY(THREAD_LOCAL AArch64ICacheInvalidationContext* AArch64ICacheInvalidationContext::_current_context = nullptr;)
diff --git a/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.hpp
index 8fbaa7a6b6e..5121a875701 100644
--- a/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.hpp
+++ b/src/hotspot/os_cpu/linux_aarch64/icache_linux_aarch64.hpp
@@ -26,6 +26,11 @@
#ifndef OS_CPU_LINUX_AARCH64_ICACHE_AARCH64_HPP
#define OS_CPU_LINUX_AARCH64_ICACHE_AARCH64_HPP
+#include "memory/allocation.hpp"
+#include "runtime/vm_version.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "vm_version_aarch64.hpp"
+
// Interface for updating the instruction cache. Whenever the VM
// modifies code, part of the processor instruction cache potentially
// has to be flushed.
@@ -37,8 +42,105 @@ class ICache : public AbstractICache {
__builtin___clear_cache((char *)addr, (char *)(addr + 4));
}
static void invalidate_range(address start, int nbytes) {
- __builtin___clear_cache((char *)start, (char *)(start + nbytes));
+ if (NeoverseN1ICacheErratumMitigation) {
+ assert(VM_Version::is_cache_idc_enabled(),
+ "Expect CTR_EL0.IDC to be enabled for Neoverse N1 with erratum "
+ "1542419");
+ assert(!VM_Version::is_cache_dic_enabled(),
+ "Expect CTR_EL0.DIC to be disabled for Neoverse N1 with erratum "
+ "1542419");
+ assert(VM_Version::is_ic_ivau_trapped(), "Expect 'ic ivau, xzr' to be trapped");
+ asm volatile("dsb ish \n"
+ "ic ivau, xzr \n"
+ "dsb ish \n"
+ "isb \n"
+ : : : "memory");
+ } else {
+ __builtin___clear_cache((char *)start, (char *)(start + nbytes));
+ }
}
};
+class AArch64ICacheInvalidationContext : StackObj {
+ private:
+
+#ifdef ASSERT
+ static THREAD_LOCAL AArch64ICacheInvalidationContext* _current_context;
+#endif
+
+ bool _has_modified_code;
+
+ public:
+ NONCOPYABLE(AArch64ICacheInvalidationContext);
+
+ AArch64ICacheInvalidationContext()
+ : _has_modified_code(false) {
+ assert(_current_context == nullptr, "nested ICacheInvalidationContext not supported");
+#ifdef ASSERT
+ _current_context = this;
+#endif
+ }
+
+ ~AArch64ICacheInvalidationContext() {
+ DEBUG_ONLY(_current_context = nullptr);
+
+ if (!_has_modified_code || !UseSingleICacheInvalidation) {
+ return;
+ }
+
+ assert(VM_Version::is_cache_idc_enabled(), "Expect CTR_EL0.IDC to be enabled");
+
+ asm volatile("dsb ish" : : : "memory");
+
+ if (NeoverseN1ICacheErratumMitigation) {
+ assert(!VM_Version::is_cache_dic_enabled(),
+ "Expect CTR_EL0.DIC to be disabled for Neoverse N1 with erratum "
+ "1542419");
+ assert(VM_Version::is_ic_ivau_trapped(), "Expect 'ic ivau, xzr' to be trapped");
+
+ // Errata 1542419: Neoverse N1 cores with the 'COHERENT_ICACHE' feature
+ // may fetch stale instructions when software depends on
+ // prefetch-speculation-protection instead of explicit synchronization.
+ //
+ // Neoverse-N1 implementation mitigates the errata 1542419 with a
+ // workaround:
+ // - Disable coherent icache.
+ // - Trap IC IVAU instructions.
+ // - Execute:
+ // - tlbi vae3is, xzr
+ // - dsb sy
+ // - Ignore trapped IC IVAU instructions.
+ //
+ // `tlbi vae3is, xzr` invalidates all translation entries (all VAs, all
+ // possible levels). It waits for all memory accesses using in-scope old
+ // translation information to complete before it is considered complete.
+ //
+ // As this workaround has significant overhead, Arm Neoverse N1 (MP050)
+ // Software Developer Errata Notice version 29.0 suggests:
+ //
+ // "Since one TLB inner-shareable invalidation is enough to avoid this
+ // erratum, the number of injected TLB invalidations should be minimized
+ // in the trap handler to mitigate the performance impact due to this
+ // workaround."
+ // As the address for icache invalidation is not relevant and
+ // IC IVAU instruction is ignored, we use XZR in it.
+ asm volatile(
+ "ic ivau, xzr \n"
+ "dsb ish \n"
+ :
+ :
+ : "memory");
+ } else {
+ assert(VM_Version::is_cache_dic_enabled(), "Expect CTR_EL0.DIC to be enabled");
+ }
+ asm volatile("isb" : : : "memory");
+ }
+
+ void set_has_modified_code() {
+ _has_modified_code = true;
+ }
+};
+
+#define PD_ICACHE_INVALIDATION_CONTEXT AArch64ICacheInvalidationContext
+
#endif // OS_CPU_LINUX_AARCH64_ICACHE_AARCH64_HPP
diff --git a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp
index da9e7e159f1..67e0569bf31 100644
--- a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp
+++ b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp
@@ -77,6 +77,11 @@
#define REG_LR 30
#define REG_BCP 22
+// IC IVAU trap probe.
+// Defined in ic_ivau_probe_linux_aarch64.S.
+extern "C" char _ic_ivau_probe_fault[] __attribute__ ((visibility ("hidden")));
+extern "C" char _ic_ivau_probe_continuation[] __attribute__ ((visibility ("hidden")));
+
NOINLINE address os::current_stack_pointer() {
return (address)__builtin_frame_address(0);
}
@@ -228,6 +233,12 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
}
}
+ // IC IVAU trap probe during VM_Version initialization.
+ // If IC IVAU is not trapped, it faults on unmapped VA 0x0.
+ if (sig == SIGSEGV && pc == (address)_ic_ivau_probe_fault) {
+ stub = (address)_ic_ivau_probe_continuation;
+ }
+
if (thread->thread_state() == _thread_in_Java) {
// Java thread running in Java code => find exception handler if any
// a fault inside compiled code, the interpreter, or a stub
diff --git a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp
index 1fe06dc640d..ee2d3013c4c 100644
--- a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp
+++ b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp
@@ -31,6 +31,10 @@
#include
#include
+// IC IVAU trap probe.
+// Defined in ic_ivau_probe_linux_aarch64.S.
+extern "C" int ic_ivau_probe(void);
+
#ifndef HWCAP_AES
#define HWCAP_AES (1<<3)
#endif
@@ -95,6 +99,13 @@
#define HWCAP2_SVEBITPERM (1 << 4)
#endif
+#ifndef HWCAP2_ECV
+#define HWCAP2_ECV (1 << 19)
+#endif
+
+#ifndef HWCAP2_WFXT
+#define HWCAP2_WFXT (1u << 31)
+#endif
#ifndef PR_SVE_GET_VL
// For old toolchains which do not have SVE related macros defined.
#define PR_SVE_SET_VL 50
@@ -158,6 +169,12 @@ void VM_Version::get_os_cpu_info() {
if (auxv2 & HWCAP2_SVEBITPERM) {
set_feature(CPU_SVEBITPERM);
}
+ if (auxv2 & HWCAP2_ECV) {
+ set_feature(CPU_ECV);
+ }
+ if (auxv2 & HWCAP2_WFXT) {
+ set_feature(CPU_WFXT);
+ }
uint64_t ctr_el0;
uint64_t dczid_el0;
@@ -169,6 +186,12 @@ void VM_Version::get_os_cpu_info() {
_icache_line_size = (1 << (ctr_el0 & 0x0f)) * 4;
_dcache_line_size = (1 << ((ctr_el0 >> 16) & 0x0f)) * 4;
+ _cache_idc_enabled = ((ctr_el0 >> 28) & 0x1) != 0;
+ _cache_dic_enabled = ((ctr_el0 >> 29) & 0x1) != 0;
+
+ // Probe whether IC IVAU is trapped.
+ // Must run before VM_Version::initialize() sets NeoverseN1ICacheErratumMitigation.
+ _ic_ivau_trapped = (ic_ivau_probe() == 1);
if (!(dczid_el0 & 0x10)) {
_zva_length = 4 << (dczid_el0 & 0xf);
diff --git a/src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp
index 390207f9e5e..c03f5ed1c8b 100644
--- a/src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/atomicAccess_linux_arm.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -26,8 +26,6 @@
#define OS_CPU_LINUX_ARM_ATOMICACCESS_LINUX_ARM_HPP
#include "memory/allStatic.hpp"
-#include "runtime/os.hpp"
-#include "runtime/vm_version.hpp"
// Implementation of class AtomicAccess
diff --git a/src/hotspot/os_cpu/linux_arm/javaThread_linux_arm.cpp b/src/hotspot/os_cpu/linux_arm/javaThread_linux_arm.cpp
index 3dc0035ed87..2b96e978980 100644
--- a/src/hotspot/os_cpu/linux_arm/javaThread_linux_arm.cpp
+++ b/src/hotspot/os_cpu/linux_arm/javaThread_linux_arm.cpp
@@ -42,8 +42,19 @@ frame JavaThread::pd_last_frame() {
void JavaThread::cache_global_variables() {
BarrierSet* bs = BarrierSet::barrier_set();
+#if INCLUDE_G1GC
+ if (bs->is_a(BarrierSet::G1BarrierSet)) {
+ _card_table_base = nullptr;
+ } else
+#endif
+#if INCLUDE_SHENANDOAHGC
+ if (bs->is_a(BarrierSet::ShenandoahBarrierSet)) {
+ _card_table_base = nullptr;
+ } else
+#endif
if (bs->is_a(BarrierSet::CardTableBarrierSet)) {
- _card_table_base = (address) (barrier_set_cast(bs)->card_table()->byte_map_base());
+ CardTableBarrierSet* ctbs = CardTableBarrierSet::barrier_set();
+ _card_table_base = (address)ctbs->card_table_base_const();
} else {
_card_table_base = nullptr;
}
diff --git a/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.hpp
index 3bb357704fb..49c6942b8e0 100644
--- a/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,7 +27,6 @@
// Included in orderAccess.hpp header file.
-#include "runtime/os.hpp"
#include "runtime/vm_version.hpp"
// Implementation of class OrderAccess.
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/ReturnTypeEntry.java b/src/hotspot/os_cpu/linux_ppc/vm_version_linux_ppc.cpp
similarity index 52%
rename from src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/ReturnTypeEntry.java
rename to src/hotspot/os_cpu/linux_ppc/vm_version_linux_ppc.cpp
index 667e4cd4a94..d64340edf5c 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/ReturnTypeEntry.java
+++ b/src/hotspot/os_cpu/linux_ppc/vm_version_linux_ppc.cpp
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2026 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -22,39 +23,22 @@
*
*/
-package sun.jvm.hotspot.oops;
+#include "runtime/vm_version.hpp"
-import java.io.*;
-import java.util.*;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-import sun.jvm.hotspot.utilities.*;
+#include
-// Type entry used for return from a call. A single cell to record the
-// type.
-public class ReturnTypeEntry extends TypeEntries {
- static final int cellCount = 1;
-
- ReturnTypeEntry(MethodDataInterface methodData, ProfileData pd, int baseOff) {
- super(methodData, pd, baseOff);
- }
-
- K type() {
- return validKlass(baseOff);
- }
-
- static int staticCellCount() {
- return cellCount;
- }
-
- int typeIndex() {
- return baseOff;
- }
-
- void printDataOn(PrintStream st) {
- pd.tab(st);
- printKlass(st, baseOff);
- st.println();
- }
+int VM_Version::get_dcache_line_size() {
+ // This should work on all modern linux versions:
+ int size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+ // It may fail with very old linux / glibc versions. We use DEFAULT_CACHE_LINE_SIZE in this case.
+ // That is the correct value for all currently supported processors.
+ return (size <= 0) ? DEFAULT_CACHE_LINE_SIZE : size;
+}
+
+int VM_Version::get_icache_line_size() {
+ // This should work on all modern linux versions:
+ int size = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
+ // It may fail with very old linux / glibc versions. We use DEFAULT_CACHE_LINE_SIZE in this case.
+ // That is the correct value for all currently supported processors.
+ return (size <= 0) ? DEFAULT_CACHE_LINE_SIZE : size;
}
diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
index 35cbb75e8ff..648131b94a3 100644
--- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
@@ -36,40 +36,42 @@
#include
#include