mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-13 09:23:20 +00:00
Merge branch 'master' into save-stubgen-stubs
This commit is contained in:
commit
ace5d18b86
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -68,17 +68,19 @@ java.compiler.interim_EXTRA_FILES := \
|
||||
TARGETS += $(BUILDTOOLS_OUTPUTDIR)/gensrc/java.compiler.interim/javax/tools/ToolProvider.java
|
||||
|
||||
################################################################################
|
||||
# Use the up-to-date PreviewFeature.java and NoPreview.java from the current
|
||||
# sources, instead of the versions from the boot JDK, as javac may be referring
|
||||
# to constants from the up-to-date versions.
|
||||
# Create a hybrid PreviewFeature.java that combines constants
|
||||
# from the current sources, as those can be used in javac APIs, and from the
|
||||
# bootstrap JDK, as those can be used from bootstrap JDK classfiles.
|
||||
|
||||
$(eval $(call SetupCopyFiles, COPY_PREVIEW_FEATURES, \
|
||||
FILES := $(TOPDIR)/src/java.base/share/classes/jdk/internal/javac/PreviewFeature.java \
|
||||
$(TOPDIR)/src/java.base/share/classes/jdk/internal/javac/NoPreview.java, \
|
||||
DEST := $(BUILDTOOLS_OUTPUTDIR)/gensrc/java.base.interim/jdk/internal/javac/, \
|
||||
))
|
||||
$(BUILDTOOLS_OUTPUTDIR)/gensrc/java.base.interim/jdk/internal/javac/PreviewFeature.java: \
|
||||
$(TOPDIR)/src/java.base/share/classes/jdk/internal/javac/PreviewFeature.java
|
||||
$(call LogInfo, Generating $@)
|
||||
$(JAVA) $(TOPDIR)/make/langtools/tools/previewfeature/SetupPreviewFeature.java \
|
||||
$(TOPDIR)/src/java.base/share/classes/jdk/internal/javac/PreviewFeature.java \
|
||||
$@
|
||||
|
||||
TARGETS += $(COPY_PREVIEW_FEATURES)
|
||||
|
||||
TARGETS += $(BUILDTOOLS_OUTPUTDIR)/gensrc/java.base.interim/jdk/internal/javac/PreviewFeature.java
|
||||
|
||||
################################################################################
|
||||
# Setup the rules to build interim langtools, which is compiled by the boot
|
||||
@ -123,7 +125,8 @@ define SetupInterimModule
|
||||
$1_DEPS_INTERIM := $$(addsuffix .interim, $$(filter \
|
||||
$$(INTERIM_LANGTOOLS_BASE_MODULES), $$(call FindTransitiveDepsForModule, $1)))
|
||||
|
||||
$$(BUILD_$1.interim): $$(foreach d, $$($1_DEPS_INTERIM), $$(BUILD_$$d)) $(COPY_PREVIEW_FEATURES)
|
||||
$$(BUILD_$1.interim): $$(foreach d, $$($1_DEPS_INTERIM), $$(BUILD_$$d)) \
|
||||
$(BUILDTOOLS_OUTPUTDIR)/gensrc/java.base.interim/jdk/internal/javac/PreviewFeature.java
|
||||
|
||||
TARGETS += $$(BUILD_$1.interim)
|
||||
endef
|
||||
|
||||
@ -70,12 +70,15 @@ CLASSLIST_FILE_VM_OPTS = \
|
||||
|
||||
# Save the stderr output of the command and print it along with stdout in case
|
||||
# something goes wrong.
|
||||
# The classlists must be generated with -Xint to avoid non-determinism
|
||||
# introduced by JIT compiled code
|
||||
$(CLASSLIST_FILE): $(INTERIM_IMAGE_DIR)/bin/java$(EXECUTABLE_SUFFIX) $(CLASSLIST_JAR)
|
||||
$(call MakeDir, $(LINK_OPT_DIR))
|
||||
$(call LogInfo, Generating $(patsubst $(OUTPUTDIR)/%, %, $@))
|
||||
$(call LogInfo, Generating $(patsubst $(OUTPUTDIR)/%, %, $(JLI_TRACE_FILE)))
|
||||
$(FIXPATH) $(INTERIM_IMAGE_DIR)/bin/java -XX:DumpLoadedClassList=$@.raw \
|
||||
$(CLASSLIST_FILE_VM_OPTS) \
|
||||
-Xint \
|
||||
-Xlog:aot=off \
|
||||
-Xlog:cds=off \
|
||||
-cp $(SUPPORT_OUTPUTDIR)/classlist.jar \
|
||||
@ -90,6 +93,7 @@ $(CLASSLIST_FILE): $(INTERIM_IMAGE_DIR)/bin/java$(EXECUTABLE_SUFFIX) $(CLASSLIST
|
||||
-XX:SharedClassListFile=$@.interim -XX:SharedArchiveFile=$@.jsa \
|
||||
-Djava.lang.invoke.MethodHandle.TRACE_RESOLVE=true \
|
||||
$(CLASSLIST_FILE_VM_OPTS) \
|
||||
-Xint \
|
||||
--module-path $(SUPPORT_OUTPUTDIR)/classlist.jar \
|
||||
-Xlog:aot=off \
|
||||
-Xlog:cds=off \
|
||||
|
||||
@ -578,6 +578,11 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_HELPER],
|
||||
TOOLCHAIN_CFLAGS_JDK_CONLY="-fno-strict-aliasing" # technically NOT for CXX
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_LINKTIME_GC" = xtrue; then
|
||||
TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK -ffunction-sections -fdata-sections"
|
||||
TOOLCHAIN_CFLAGS_JVM="$TOOLCHAIN_CFLAGS_JVM -ffunction-sections -fdata-sections"
|
||||
fi
|
||||
|
||||
if test "x$OPENJDK_TARGET_OS" = xaix; then
|
||||
TOOLCHAIN_CFLAGS_JVM="$TOOLCHAIN_CFLAGS_JVM -ffunction-sections -ftls-model -fno-math-errno"
|
||||
TOOLCHAIN_CFLAGS_JDK="-ffunction-sections -fsigned-char"
|
||||
|
||||
@ -80,6 +80,10 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
|
||||
if test "x$CXX_IS_USER_SUPPLIED" = xfalse && test "x$CC_IS_USER_SUPPLIED" = xfalse; then
|
||||
UTIL_REQUIRE_TOOLCHAIN_PROGS(LLD, lld)
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_LINKTIME_GC" = xtrue; then
|
||||
BASIC_LDFLAGS_JDK_ONLY="$BASIC_LDFLAGS_JDK_ONLY -Wl,--gc-sections"
|
||||
fi
|
||||
fi
|
||||
if test "x$OPENJDK_TARGET_OS" = xaix; then
|
||||
BASIC_LDFLAGS="-Wl,-b64 -Wl,-brtl -Wl,-bnorwexec -Wl,-blibpath:/usr/lib:lib -Wl,-bnoexpall \
|
||||
|
||||
@ -103,8 +103,12 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_JDK_OPTIONS],
|
||||
AC_SUBST(ENABLE_HEADLESS_ONLY)
|
||||
|
||||
# should we linktime gc unused code sections in the JDK build ?
|
||||
if test "x$OPENJDK_TARGET_OS" = "xlinux" && test "x$OPENJDK_TARGET_CPU" = xs390x; then
|
||||
LINKTIME_GC_DEFAULT=true
|
||||
if test "x$OPENJDK_TARGET_OS" = "xlinux"; then
|
||||
if test "x$OPENJDK_TARGET_CPU" = "xs390x" || test "x$OPENJDK_TARGET_CPU" = "xppc64le"; then
|
||||
LINKTIME_GC_DEFAULT=true
|
||||
else
|
||||
LINKTIME_GC_DEFAULT=false
|
||||
fi
|
||||
else
|
||||
LINKTIME_GC_DEFAULT=false
|
||||
fi
|
||||
|
||||
@ -63,6 +63,10 @@ $(eval $(call SetupJdkLibrary, BUILD_GTEST_LIBGTEST, \
|
||||
unused-result zero-as-null-pointer-constant, \
|
||||
DISABLED_WARNINGS_clang := format-nonliteral undef unused-result \
|
||||
zero-as-null-pointer-constant, \
|
||||
$(comment Disable deprecated-declarations warnings to workaround) \
|
||||
$(comment clang18+glibc12 bug https://github.com/llvm/llvm-project/issues/76515) \
|
||||
$(comment until the clang bug has been fixed) \
|
||||
DISABLED_WARNINGS_clang_gtest-all.cc := deprecated-declarations, \
|
||||
DISABLED_WARNINGS_microsoft := 4530, \
|
||||
DEFAULT_CFLAGS := false, \
|
||||
CFLAGS := $(JVM_CFLAGS) \
|
||||
|
||||
93
make/langtools/tools/previewfeature/SetupPreviewFeature.java
Normal file
93
make/langtools/tools/previewfeature/SetupPreviewFeature.java
Normal file
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package previewfeature;
|
||||
|
||||
import com.sun.source.util.JavacTask;
|
||||
import com.sun.source.util.Trees;
|
||||
import java.io.StringWriter;
|
||||
import java.lang.reflect.Field;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import javax.lang.model.element.ElementKind;
|
||||
import javax.tools.ToolProvider;
|
||||
|
||||
/* Construct a hybrid PreviewFeature.Feature enum that includes constants both
|
||||
* from the current JDK sources (so that they can be used in the javac API sources),
|
||||
* and from the bootstrap JDK (so that they can be used in the bootstrap classfiles).
|
||||
*
|
||||
* This hybrid enum is only used for the interim javac.
|
||||
*/
|
||||
public class SetupPreviewFeature {
|
||||
public static void main(String... args) throws Exception {
|
||||
Class<?> runtimeFeature = Class.forName("jdk.internal.javac.PreviewFeature$Feature");
|
||||
Set<String> constantsToAdd = new HashSet<>();
|
||||
for (Field runtimeField : runtimeFeature.getDeclaredFields()) {
|
||||
if (runtimeField.isEnumConstant()) {
|
||||
constantsToAdd.add(runtimeField.getName());
|
||||
}
|
||||
}
|
||||
var dummy = new StringWriter();
|
||||
var compiler = ToolProvider.getSystemJavaCompiler();
|
||||
var source = Path.of(args[0]);
|
||||
try (var fm = compiler.getStandardFileManager(null, null, null)) {
|
||||
JavacTask task =
|
||||
(JavacTask) compiler.getTask(dummy, null, null, null, null, fm.getJavaFileObjects(source));
|
||||
task.analyze();
|
||||
var sourceFeature = task.getElements()
|
||||
.getTypeElement("jdk.internal.javac.PreviewFeature.Feature");
|
||||
int insertPosition = -1;
|
||||
for (var el : sourceFeature.getEnclosedElements()) {
|
||||
if (el.getKind() == ElementKind.ENUM_CONSTANT) {
|
||||
constantsToAdd.remove(el.getSimpleName().toString());
|
||||
if (insertPosition == (-1)) {
|
||||
var trees = Trees.instance(task);
|
||||
var elPath = trees.getPath(el);
|
||||
insertPosition = (int) trees.getSourcePositions()
|
||||
.getStartPosition(elPath.getCompilationUnit(),
|
||||
elPath.getLeaf());
|
||||
}
|
||||
}
|
||||
}
|
||||
var target = Path.of(args[1]);
|
||||
Files.createDirectories(target.getParent());
|
||||
if (constantsToAdd.isEmpty()) {
|
||||
Files.copy(source, target);
|
||||
} else {
|
||||
String sourceCode = Files.readString(source);
|
||||
try (var out = Files.newBufferedWriter(target)) {
|
||||
out.write(sourceCode, 0, insertPosition);
|
||||
out.write(constantsToAdd.stream()
|
||||
.collect(Collectors.joining(", ",
|
||||
"/*compatibility constants:*/ ",
|
||||
",\n")));
|
||||
out.write(sourceCode, insertPosition, sourceCode.length() - insertPosition);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -423,6 +423,9 @@ endif
|
||||
ifeq ($(call isTargetOs, linux)+$(ENABLE_HEADLESS_ONLY), true+true)
|
||||
LIBJAWT_CFLAGS += -DHEADLESS
|
||||
endif
|
||||
ifeq ($(call isTargetOs, aix)+$(ENABLE_HEADLESS_ONLY), true+true)
|
||||
LIBJAWT_CFLAGS += -DHEADLESS
|
||||
endif
|
||||
|
||||
ifeq ($(call isTargetOs, windows)+$(call isTargetCpu, x86), true+true)
|
||||
LIBJAWT_LIBS_windows := kernel32.lib
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved.
|
||||
// Copyright 2025 Arm Limited and/or its affiliates.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
@ -2467,11 +2467,8 @@ bool Matcher::is_generic_vector(MachOper* opnd) {
|
||||
return opnd->opcode() == VREG;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
// Return whether or not this register is ever used as an argument.
|
||||
// This function is used on startup to build the trampoline stubs in
|
||||
// generateOptoStub. Registers not mentioned will be killed by the VM
|
||||
// call in the trampoline, and arguments in those registers not be
|
||||
// available to the callee.
|
||||
bool Matcher::can_be_java_arg(int reg)
|
||||
{
|
||||
return
|
||||
@ -2492,11 +2489,7 @@ bool Matcher::can_be_java_arg(int reg)
|
||||
reg == V6_num || reg == V6_H_num ||
|
||||
reg == V7_num || reg == V7_H_num;
|
||||
}
|
||||
|
||||
bool Matcher::is_spillable_arg(int reg)
|
||||
{
|
||||
return can_be_java_arg(reg);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint Matcher::int_pressure_limit()
|
||||
{
|
||||
@ -3814,11 +3807,6 @@ frame %{
|
||||
// Compiled code's Frame Pointer
|
||||
frame_pointer(R31);
|
||||
|
||||
// Interpreter stores its frame pointer in a register which is
|
||||
// stored to the stack by I2CAdaptors.
|
||||
// I2CAdaptors convert from interpreted java to compiled java.
|
||||
interpreter_frame_pointer(R29);
|
||||
|
||||
// Stack alignment requirement
|
||||
stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
|
||||
|
||||
|
||||
@ -3814,8 +3814,8 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8,
|
||||
bool isMerge, bool isFloat) {
|
||||
void _sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8,
|
||||
bool isMerge, bool isFloat) {
|
||||
starti;
|
||||
assert(T != Q, "invalid size");
|
||||
int sh = 0;
|
||||
@ -3839,11 +3839,11 @@ private:
|
||||
public:
|
||||
// SVE copy signed integer immediate to vector elements (predicated)
|
||||
void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8, bool isMerge) {
|
||||
sve_cpy(Zd, T, Pg, imm8, isMerge, /*isFloat*/false);
|
||||
_sve_cpy(Zd, T, Pg, imm8, isMerge, /*isFloat*/false);
|
||||
}
|
||||
// SVE copy floating-point immediate to vector elements (predicated)
|
||||
void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, double d) {
|
||||
sve_cpy(Zd, T, Pg, checked_cast<uint8_t>(pack(d)), /*isMerge*/true, /*isFloat*/true);
|
||||
_sve_cpy(Zd, T, Pg, checked_cast<uint8_t>(pack(d)), /*isMerge*/true, /*isFloat*/true);
|
||||
}
|
||||
|
||||
// SVE conditionally select elements from two vectors
|
||||
|
||||
@ -42,7 +42,6 @@ define_pd_global(bool, TieredCompilation, false);
|
||||
define_pd_global(intx, CompileThreshold, 1500 );
|
||||
|
||||
define_pd_global(intx, OnStackReplacePercentage, 933 );
|
||||
define_pd_global(intx, NewSizeThreadIncrease, 4*K );
|
||||
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
|
||||
define_pd_global(size_t, ReservedCodeCacheSize, 32*M );
|
||||
define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M );
|
||||
|
||||
@ -2875,3 +2875,24 @@ void C2_MacroAssembler::vector_expand_sve(FloatRegister dst, FloatRegister src,
|
||||
// dst = 00 87 00 65 00 43 00 21
|
||||
sve_tbl(dst, size, src, dst);
|
||||
}
|
||||
|
||||
// Optimized SVE cpy (imm, zeroing) instruction.
|
||||
//
|
||||
// `movi; cpy(imm, merging)` and `cpy(imm, zeroing)` have the same
|
||||
// functionality, but test results show that `movi; cpy(imm, merging)` has
|
||||
// higher throughput on some microarchitectures. This would depend on
|
||||
// microarchitecture and so may vary between implementations.
|
||||
void C2_MacroAssembler::sve_cpy(FloatRegister dst, SIMD_RegVariant T,
|
||||
PRegister pg, int imm8, bool isMerge) {
|
||||
if (VM_Version::prefer_sve_merging_mode_cpy() && !isMerge) {
|
||||
// Generates a NEON instruction `movi V<dst>.2d, #0`.
|
||||
// On AArch64, Z and V registers alias in the low 128 bits, so V<dst> is
|
||||
// the low 128 bits of Z<dst>. A write to V<dst> also clears all bits of
|
||||
// Z<dst> above 128, so this `movi` instruction effectively zeroes the
|
||||
// entire Z<dst> register. According to the Arm Software Optimization
|
||||
// Guide, `movi` is zero latency.
|
||||
movi(dst, T2D, 0);
|
||||
isMerge = true;
|
||||
}
|
||||
Assembler::sve_cpy(dst, T, pg, imm8, isMerge);
|
||||
}
|
||||
|
||||
@ -75,6 +75,8 @@
|
||||
unsigned vector_length_in_bytes);
|
||||
|
||||
public:
|
||||
using Assembler::sve_cpy;
|
||||
|
||||
// jdk.internal.util.ArraysSupport.vectorizedHashCode
|
||||
address arrays_hashcode(Register ary, Register cnt, Register result, FloatRegister vdata0,
|
||||
FloatRegister vdata1, FloatRegister vdata2, FloatRegister vdata3,
|
||||
@ -244,4 +246,7 @@
|
||||
void vector_expand_sve(FloatRegister dst, FloatRegister src, PRegister pg,
|
||||
FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
|
||||
int vector_length_in_bytes);
|
||||
|
||||
void sve_cpy(FloatRegister dst, SIMD_RegVariant T, PRegister pg, int imm8,
|
||||
bool isMerge);
|
||||
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
|
||||
|
||||
@ -47,7 +47,6 @@ define_pd_global(intx, ConditionalMoveLimit, 3);
|
||||
define_pd_global(intx, FreqInlineSize, 325);
|
||||
define_pd_global(intx, MinJumpTableSize, 10);
|
||||
define_pd_global(intx, InteriorEntryAlignment, 16);
|
||||
define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
|
||||
define_pd_global(intx, LoopUnrollLimit, 60);
|
||||
define_pd_global(intx, LoopPercentProfileLimit, 10);
|
||||
// InitialCodeCacheSize derived from specjbb2000 run.
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2019, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -146,10 +146,10 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
|
||||
bool should_save_return_value = !_needs_return_buffer;
|
||||
RegSpiller out_reg_spiller(_output_registers);
|
||||
int spill_offset = -1;
|
||||
int out_spill_offset = -1;
|
||||
|
||||
if (should_save_return_value) {
|
||||
spill_offset = 0;
|
||||
out_spill_offset = 0;
|
||||
// spill area can be shared with shadow space and out args,
|
||||
// since they are only used before the call,
|
||||
// and spill area is only used after.
|
||||
@ -174,6 +174,9 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
// FP-> | |
|
||||
// |---------------------| = frame_bottom_offset = frame_size
|
||||
// | (optional) |
|
||||
// | in_reg_spiller area |
|
||||
// |---------------------|
|
||||
// | (optional) |
|
||||
// | capture state buf |
|
||||
// |---------------------| = StubLocations::CAPTURED_STATE_BUFFER
|
||||
// | (optional) |
|
||||
@ -187,6 +190,19 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
GrowableArray<VMStorage> out_regs = ForeignGlobals::replace_place_holders(_input_registers, locs);
|
||||
ArgumentShuffle arg_shuffle(filtered_java_regs, out_regs, shuffle_reg);
|
||||
|
||||
// Need to spill for state capturing runtime call.
|
||||
// The area spilled into is distinct from the capture state buffer.
|
||||
RegSpiller in_reg_spiller(out_regs);
|
||||
int in_spill_offset = -1;
|
||||
if (_captured_state_mask != 0) {
|
||||
// The spill area cannot be shared with the out_spill since
|
||||
// spilling needs to happen before the call. Allocate a new
|
||||
// region in the stack for this spill space.
|
||||
in_spill_offset = allocated_frame_size;
|
||||
allocated_frame_size += in_reg_spiller.spill_size_bytes();
|
||||
}
|
||||
|
||||
|
||||
#ifndef PRODUCT
|
||||
LogTarget(Trace, foreign, downcall) lt;
|
||||
if (lt.is_enabled()) {
|
||||
@ -228,6 +244,20 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
arg_shuffle.generate(_masm, shuffle_reg, 0, _abi._shadow_space_bytes);
|
||||
__ block_comment("} argument shuffle");
|
||||
|
||||
if (_captured_state_mask != 0) {
|
||||
assert(in_spill_offset != -1, "must be");
|
||||
__ block_comment("{ load initial thread local");
|
||||
in_reg_spiller.generate_spill(_masm, in_spill_offset);
|
||||
|
||||
// Copy the contents of the capture state buffer into thread local
|
||||
__ ldr(c_rarg0, Address(sp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
|
||||
__ movw(c_rarg1, _captured_state_mask);
|
||||
__ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_pre), tmp1);
|
||||
|
||||
in_reg_spiller.generate_fill(_masm, in_spill_offset);
|
||||
__ block_comment("} load initial thread local");
|
||||
}
|
||||
|
||||
__ blr(as_Register(locs.get(StubLocations::TARGET_ADDRESS)));
|
||||
// this call is assumed not to have killed rthread
|
||||
|
||||
@ -254,15 +284,15 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ block_comment("{ save thread local");
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ ldr(c_rarg0, Address(sp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
|
||||
__ movw(c_rarg1, _captured_state_mask);
|
||||
__ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state), tmp1);
|
||||
__ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_post), tmp1);
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ block_comment("} save thread local");
|
||||
@ -321,7 +351,7 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
|
||||
if (should_save_return_value) {
|
||||
// Need to save the native result registers around any runtime calls.
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ mov(c_rarg0, rthread);
|
||||
@ -330,7 +360,7 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ blr(tmp1);
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ b(L_after_safepoint_poll);
|
||||
@ -342,13 +372,13 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ bind(L_reguard);
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), tmp1);
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ b(L_after_reguard);
|
||||
|
||||
@ -95,7 +95,7 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
||||
"Use simplest and shortest implementation for array equals") \
|
||||
product(bool, UseSIMDForBigIntegerShiftIntrinsics, true, \
|
||||
"Use SIMD instructions for left/right shift of BigInteger") \
|
||||
product(bool, UseSIMDForSHA3Intrinsic, true, \
|
||||
product(bool, UseSIMDForSHA3Intrinsic, false, \
|
||||
"Use SIMD SHA3 instructions for SHA3 intrinsic") \
|
||||
product(bool, AvoidUnalignedAccesses, false, \
|
||||
"Avoid generating unaligned memory accesses") \
|
||||
|
||||
@ -12751,16 +12751,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(StubId::stubgen_sha512_implCompress_id);
|
||||
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(StubId::stubgen_sha512_implCompressMB_id);
|
||||
}
|
||||
if (UseSHA3Intrinsics) {
|
||||
|
||||
if (UseSHA3Intrinsics && UseSIMDForSHA3Intrinsic) {
|
||||
StubRoutines::_double_keccak = generate_double_keccak();
|
||||
if (UseSIMDForSHA3Intrinsic) {
|
||||
StubRoutines::_sha3_implCompress = generate_sha3_implCompress(StubId::stubgen_sha3_implCompress_id);
|
||||
StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress(StubId::stubgen_sha3_implCompressMB_id);
|
||||
} else {
|
||||
StubRoutines::_sha3_implCompress = generate_sha3_implCompress_gpr(StubId::stubgen_sha3_implCompress_id);
|
||||
StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress_gpr(StubId::stubgen_sha3_implCompressMB_id);
|
||||
}
|
||||
StubRoutines::_sha3_implCompress = generate_sha3_implCompress(StubId::stubgen_sha3_implCompress_id);
|
||||
StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress(StubId::stubgen_sha3_implCompressMB_id);
|
||||
} else if (UseSHA3Intrinsics) {
|
||||
StubRoutines::_sha3_implCompress = generate_sha3_implCompress_gpr(StubId::stubgen_sha3_implCompress_id);
|
||||
StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress_gpr(StubId::stubgen_sha3_implCompressMB_id);
|
||||
}
|
||||
|
||||
if (UsePoly1305Intrinsics) {
|
||||
|
||||
@ -365,16 +365,28 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseSHA && VM_Version::supports_sha3()) {
|
||||
// Auto-enable UseSHA3Intrinsics on hardware with performance benefit.
|
||||
// Note that the evaluation of UseSHA3Intrinsics shows better performance
|
||||
if (UseSHA) {
|
||||
// No need to check VM_Version::supports_sha3(), since a fallback GPR intrinsic implementation is provided.
|
||||
if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
|
||||
}
|
||||
} else if (UseSHA3Intrinsics) {
|
||||
// Matches the documented and tested behavior: the -UseSHA option disables all SHA intrinsics.
|
||||
warning("UseSHA3Intrinsics requires that UseSHA is enabled.");
|
||||
FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseSHA3Intrinsics && VM_Version::supports_sha3()) {
|
||||
// Auto-enable UseSIMDForSHA3Intrinsic on hardware with performance benefit.
|
||||
// Note that the evaluation of SHA3 extension Intrinsics shows better performance
|
||||
// on Apple and Qualcomm silicon but worse performance on Neoverse V1 and N2.
|
||||
if (_cpu == CPU_APPLE || _cpu == CPU_QUALCOMM) { // Apple or Qualcomm silicon
|
||||
if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
|
||||
if (FLAG_IS_DEFAULT(UseSIMDForSHA3Intrinsic)) {
|
||||
FLAG_SET_DEFAULT(UseSIMDForSHA3Intrinsic, true);
|
||||
}
|
||||
}
|
||||
} else if (UseSHA3Intrinsics && UseSIMDForSHA3Intrinsic) {
|
||||
}
|
||||
if (UseSHA3Intrinsics && UseSIMDForSHA3Intrinsic && !VM_Version::supports_sha3()) {
|
||||
warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
|
||||
}
|
||||
|
||||
@ -55,6 +55,9 @@ protected:
|
||||
static int _max_supported_sve_vector_length;
|
||||
static bool _rop_protection;
|
||||
static uintptr_t _pac_mask;
|
||||
// When _prefer_sve_merging_mode_cpy is true, `cpy (imm, zeroing)` is
|
||||
// implemented as `movi; cpy(imm, merging)`.
|
||||
static constexpr bool _prefer_sve_merging_mode_cpy = true;
|
||||
|
||||
static SpinWait _spin_wait;
|
||||
|
||||
@ -242,6 +245,8 @@ public:
|
||||
|
||||
static bool use_rop_protection() { return _rop_protection; }
|
||||
|
||||
static bool prefer_sve_merging_mode_cpy() { return _prefer_sve_merging_mode_cpy; }
|
||||
|
||||
// For common 64/128-bit unpredicated vector operations, we may prefer
|
||||
// emitting NEON instructions rather than the corresponding SVE instructions.
|
||||
static bool use_neon_for_vector(int vector_length_in_bytes) {
|
||||
|
||||
@ -1088,10 +1088,8 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack,
|
||||
return clone_base_plus_offset_address(m, mstack, address_visited);
|
||||
}
|
||||
|
||||
// Return whether or not this register is ever used as an argument. This
|
||||
// function is used on startup to build the trampoline stubs in generateOptoStub.
|
||||
// Registers not mentioned will be killed by the VM call in the trampoline, and
|
||||
// arguments in those registers not be available to the callee.
|
||||
#ifdef ASSERT
|
||||
// Return whether or not this register is ever used as an argument.
|
||||
bool Matcher::can_be_java_arg( int reg ) {
|
||||
if (reg == R_R0_num ||
|
||||
reg == R_R1_num ||
|
||||
@ -1102,10 +1100,7 @@ bool Matcher::can_be_java_arg( int reg ) {
|
||||
reg <= R_S13_num) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_spillable_arg( int reg ) {
|
||||
return can_be_java_arg(reg);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint Matcher::int_pressure_limit()
|
||||
{
|
||||
|
||||
@ -43,7 +43,6 @@ define_pd_global(bool, TieredCompilation, false);
|
||||
define_pd_global(intx, CompileThreshold, 1500 );
|
||||
|
||||
define_pd_global(intx, OnStackReplacePercentage, 933 );
|
||||
define_pd_global(size_t, NewSizeThreadIncrease, 4*K );
|
||||
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
|
||||
define_pd_global(size_t, ReservedCodeCacheSize, 32*M );
|
||||
define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M );
|
||||
|
||||
@ -47,7 +47,6 @@ define_pd_global(intx, ConditionalMoveLimit, 4);
|
||||
// C2 gets to use all the float/double registers
|
||||
define_pd_global(intx, FreqInlineSize, 175);
|
||||
define_pd_global(intx, InteriorEntryAlignment, 16); // = CodeEntryAlignment
|
||||
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
|
||||
// The default setting 16/16 seems to work best.
|
||||
// (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.)
|
||||
//define_pd_global(intx, OptoLoopAlignment, 16); // = 4*wordSize
|
||||
|
||||
@ -52,7 +52,6 @@ define_pd_global(size_t, CodeCacheExpansionSize, 32*K);
|
||||
define_pd_global(size_t, CodeCacheMinBlockLength, 1);
|
||||
define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
|
||||
define_pd_global(bool, NeverActAsServerClassMachine, true);
|
||||
define_pd_global(size_t, NewSizeThreadIncrease, 16*K);
|
||||
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
|
||||
#endif // !COMPILER2
|
||||
|
||||
|
||||
@ -47,7 +47,6 @@ define_pd_global(intx, ConditionalMoveLimit, 3);
|
||||
define_pd_global(intx, FreqInlineSize, 325);
|
||||
define_pd_global(intx, MinJumpTableSize, 10);
|
||||
define_pd_global(intx, InteriorEntryAlignment, 16);
|
||||
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
|
||||
define_pd_global(intx, RegisterCostAreaRatio, 16000);
|
||||
define_pd_global(intx, LoopUnrollLimit, 60);
|
||||
define_pd_global(intx, LoopPercentProfileLimit, 10);
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2025 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -135,10 +135,10 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
|
||||
bool should_save_return_value = !_needs_return_buffer;
|
||||
RegSpiller out_reg_spiller(_output_registers);
|
||||
int spill_offset = -1;
|
||||
int out_spill_offset = -1;
|
||||
|
||||
if (should_save_return_value) {
|
||||
spill_offset = frame::native_abi_reg_args_size;
|
||||
out_spill_offset = frame::native_abi_reg_args_size;
|
||||
// Spill area can be shared with additional out args (>8),
|
||||
// since it is only used after the call.
|
||||
int frame_size_including_spill_area = frame::native_abi_reg_args_size + out_reg_spiller.spill_size_bytes();
|
||||
@ -170,6 +170,18 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
|
||||
ArgumentShuffle arg_shuffle(filtered_java_regs, out_regs, _abi._scratch1);
|
||||
|
||||
// Need to spill for state capturing runtime call.
|
||||
// The area spilled into is distinct from the capture state buffer.
|
||||
RegSpiller in_reg_spiller(out_regs);
|
||||
int in_spill_offset = -1;
|
||||
if (_captured_state_mask != 0) {
|
||||
// The spill area cannot be shared with the out_spill since
|
||||
// spilling needs to happen before the call. Allocate a new
|
||||
// region in the stack for this spill space.
|
||||
in_spill_offset = allocated_frame_size;
|
||||
allocated_frame_size += in_reg_spiller.spill_size_bytes();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
LogTarget(Trace, foreign, downcall) lt;
|
||||
if (lt.is_enabled()) {
|
||||
@ -211,6 +223,21 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
arg_shuffle.generate(_masm, as_VMStorage(callerSP), frame::jit_out_preserve_size, frame::native_abi_minframe_size);
|
||||
__ block_comment("} argument shuffle");
|
||||
|
||||
if (_captured_state_mask != 0) {
|
||||
assert(in_spill_offset != -1, "must be");
|
||||
__ block_comment("{ load initial thread local");
|
||||
in_reg_spiller.generate_spill(_masm, in_spill_offset);
|
||||
|
||||
// Copy the contents of the capture state buffer into thread local
|
||||
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state_pre), R0);
|
||||
__ ld(R3_ARG1, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER), R1_SP);
|
||||
__ load_const_optimized(R4_ARG2, _captured_state_mask, R0);
|
||||
__ call_c(call_target_address);
|
||||
|
||||
in_reg_spiller.generate_fill(_masm, in_spill_offset);
|
||||
__ block_comment("} load initial thread local");
|
||||
}
|
||||
|
||||
__ call_c(call_target_address);
|
||||
|
||||
if (_needs_return_buffer) {
|
||||
@ -247,16 +274,16 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ block_comment("{ save thread local");
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state), R0);
|
||||
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state_post), R0);
|
||||
__ ld(R3_ARG1, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER), R1_SP);
|
||||
__ load_const_optimized(R4_ARG2, _captured_state_mask, R0);
|
||||
__ call_c(call_target_address);
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ block_comment("} save thread local");
|
||||
@ -310,7 +337,7 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
|
||||
if (should_save_return_value) {
|
||||
// Need to save the native result registers around any runtime calls.
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, JavaThread::check_special_condition_for_native_trans), R0);
|
||||
@ -318,7 +345,7 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ call_c(call_target_address);
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ b(L_after_safepoint_poll);
|
||||
@ -330,14 +357,14 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ bind(L_reguard);
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, SharedRuntime::reguard_yellow_pages), R0);
|
||||
__ call_c(call_target_address);
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ b(L_after_reguard);
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2025 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2018, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -179,6 +179,11 @@ void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Re
|
||||
__ ld(dst, 0, dst); // Resolve (untagged) jobject.
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
|
||||
// Load the oop from the weak handle.
|
||||
__ ld(obj, 0, obj);
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Register tmp) {
|
||||
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
|
||||
assert_different_registers(tmp, R0);
|
||||
@ -275,11 +280,6 @@ OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Na
|
||||
return opto_reg;
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
|
||||
// Load the oop from the weak handle.
|
||||
__ ld(obj, 0, obj);
|
||||
}
|
||||
|
||||
#undef __
|
||||
#define __ _masm->
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2022 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2018, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -70,6 +70,12 @@ public:
|
||||
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register dst, Register jni_env,
|
||||
Register obj, Register tmp, Label& slowpath);
|
||||
|
||||
// Can be used in nmethods including native wrappers.
|
||||
// Attention: obj will only be valid until next safepoint (no SATB barrier).
|
||||
// TODO: maybe rename to try_peek_weak_handle on all platforms (try: operation may fail, peek: obj is not kept alive)
|
||||
// (other platforms currently use it for C2 only: try_resolve_weak_handle_in_c2)
|
||||
virtual void try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
|
||||
|
||||
virtual void barrier_stubs_init() {}
|
||||
|
||||
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::stw_instruction_and_data_patch; }
|
||||
@ -81,8 +87,6 @@ public:
|
||||
|
||||
#ifdef COMPILER2
|
||||
OptoReg::Name refine_register(const Node* node, OptoReg::Name opto_reg) const;
|
||||
virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj,
|
||||
Register tmp, Label& slow_path);
|
||||
#endif // COMPILER2
|
||||
};
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2025, Red Hat, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, 2025 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2012, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -663,17 +663,16 @@ void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler
|
||||
__ block_comment("} try_resolve_jobject_in_native (shenandoahgc)");
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
void ShenandoahBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler *masm, Register obj,
|
||||
Register tmp, Label &slow_path) {
|
||||
__ block_comment("try_resolve_weak_handle_in_c2 (shenandoahgc) {");
|
||||
void ShenandoahBarrierSetAssembler::try_resolve_weak_handle(MacroAssembler *masm, Register obj,
|
||||
Register tmp, Label &slow_path) {
|
||||
__ block_comment("try_resolve_weak_handle (shenandoahgc) {");
|
||||
|
||||
assert_different_registers(obj, tmp);
|
||||
|
||||
Label done;
|
||||
|
||||
// Resolve weak handle using the standard implementation.
|
||||
BarrierSetAssembler::try_resolve_weak_handle_in_c2(masm, obj, tmp, slow_path);
|
||||
BarrierSetAssembler::try_resolve_weak_handle(masm, obj, tmp, slow_path);
|
||||
|
||||
// Check if the reference is null, and if it is, take the fast path.
|
||||
__ cmpdi(CR0, obj, 0);
|
||||
@ -686,9 +685,8 @@ void ShenandoahBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler
|
||||
__ bne(CR0, slow_path);
|
||||
__ bind(done);
|
||||
|
||||
__ block_comment("} try_resolve_weak_handle_in_c2 (shenandoahgc)");
|
||||
__ block_comment("} try_resolve_weak_handle (shenandoahgc)");
|
||||
}
|
||||
#endif
|
||||
|
||||
// Special shenandoah CAS implementation that handles false negatives due
|
||||
// to concurrent evacuation. That is, the CAS operation is intended to succeed in
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, 2022 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2012, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -122,9 +122,8 @@ public:
|
||||
|
||||
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register dst, Register jni_env,
|
||||
Register obj, Register tmp, Label& slowpath);
|
||||
#ifdef COMPILER2
|
||||
virtual void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
|
||||
#endif
|
||||
|
||||
virtual void try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
|
||||
};
|
||||
|
||||
#endif // CPU_PPC_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_PPC_HPP
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2021, 2025 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2021, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -627,6 +627,19 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, R
|
||||
__ block_comment("} try_resolve_jobject_in_native (zgc)");
|
||||
}
|
||||
|
||||
void ZBarrierSetAssembler::try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
|
||||
// Resolve weak handle using the standard implementation.
|
||||
BarrierSetAssembler::try_resolve_weak_handle(masm, obj, tmp, slow_path);
|
||||
|
||||
// Check if the oop is bad, in which case we need to take the slow path.
|
||||
__ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatMarkBadMask);
|
||||
__ andi_(R0, obj, barrier_Relocation::unpatched);
|
||||
__ bne(CR0, slow_path);
|
||||
|
||||
// Oop is okay, so we uncolor it.
|
||||
__ srdi(obj, obj, ZPointerLoadShift);
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
#ifdef COMPILER1
|
||||
@ -950,19 +963,6 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm,
|
||||
__ b(*stub->continuation());
|
||||
}
|
||||
|
||||
void ZBarrierSetAssembler::try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path) {
|
||||
// Resolve weak handle using the standard implementation.
|
||||
BarrierSetAssembler::try_resolve_weak_handle_in_c2(masm, obj, tmp, slow_path);
|
||||
|
||||
// Check if the oop is bad, in which case we need to take the slow path.
|
||||
__ relocate(barrier_Relocation::spec(), ZBarrierRelocationFormatMarkBadMask);
|
||||
__ andi_(R0, obj, barrier_Relocation::unpatched);
|
||||
__ bne(CR0, slow_path);
|
||||
|
||||
// Oop is okay, so we uncolor it.
|
||||
__ srdi(obj, obj, ZPointerLoadShift);
|
||||
}
|
||||
|
||||
#undef __
|
||||
#endif // COMPILER2
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2021, 2022 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2021, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -72,6 +72,8 @@ public:
|
||||
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register dst, Register jni_env,
|
||||
Register obj, Register tmp, Label& slowpath);
|
||||
|
||||
virtual void try_resolve_weak_handle(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
|
||||
|
||||
virtual void check_oop(MacroAssembler *masm, Register obj, const char* msg);
|
||||
|
||||
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_instruction_and_data_patch; }
|
||||
@ -108,8 +110,6 @@ public:
|
||||
void generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const;
|
||||
|
||||
void generate_c2_store_barrier_stub(MacroAssembler* masm, ZStoreBarrierStubC2* stub) const;
|
||||
|
||||
void try_resolve_weak_handle_in_c2(MacroAssembler* masm, Register obj, Register tmp, Label& slow_path);
|
||||
#endif // COMPILER2
|
||||
|
||||
void store_barrier_fast(MacroAssembler* masm,
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2018 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -24,6 +24,7 @@
|
||||
*/
|
||||
|
||||
#include "runtime/icache.hpp"
|
||||
#include "runtime/vm_version.hpp"
|
||||
|
||||
// Use inline assembler to implement icache flush.
|
||||
int ICache::ppc64_flush_icache(address start, int lines, int magic) {
|
||||
@ -67,6 +68,9 @@ int ICache::ppc64_flush_icache(address start, int lines, int magic) {
|
||||
|
||||
void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
|
||||
|
||||
guarantee(VM_Version::get_icache_line_size() >= ICache::line_size,
|
||||
"processors with smaller cache line size are no longer supported");
|
||||
|
||||
*flush_icache_stub = (ICache::flush_icache_stub_t)ICache::ppc64_flush_icache;
|
||||
|
||||
// First call to flush itself.
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2013 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -35,9 +35,8 @@ class ICache : public AbstractICache {
|
||||
|
||||
public:
|
||||
enum {
|
||||
// Actually, cache line size is 64, but keeping it as it is to be
|
||||
// on the safe side on ALL PPC64 implementations.
|
||||
log2_line_size = 5,
|
||||
// Cache line size is 128 on all supported PPC64 implementations.
|
||||
log2_line_size = 7,
|
||||
line_size = 1 << log2_line_size
|
||||
};
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2025 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2012, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -2800,7 +2800,7 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
|
||||
// Check if object matches.
|
||||
ld(tmp3, in_bytes(ObjectMonitor::object_offset()), monitor);
|
||||
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs_asm->try_resolve_weak_handle_in_c2(this, tmp3, tmp2, slow_path);
|
||||
bs_asm->try_resolve_weak_handle(this, tmp3, tmp2, slow_path);
|
||||
cmpd(CR0, tmp3, obj);
|
||||
bne(CR0, slow_path);
|
||||
|
||||
|
||||
@ -2412,10 +2412,8 @@ bool Matcher::is_generic_vector(MachOper* opnd) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return whether or not this register is ever used as an argument. This
|
||||
// function is used on startup to build the trampoline stubs in generateOptoStub.
|
||||
// Registers not mentioned will be killed by the VM call in the trampoline, and
|
||||
// arguments in those registers not be available to the callee.
|
||||
#ifdef ASSERT
|
||||
// Return whether or not this register is ever used as an argument.
|
||||
bool Matcher::can_be_java_arg(int reg) {
|
||||
// We must include the virtual halves in order to get STDs and LDs
|
||||
// instead of STWs and LWs in the trampoline stubs.
|
||||
@ -2447,10 +2445,7 @@ bool Matcher::can_be_java_arg(int reg) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_spillable_arg(int reg) {
|
||||
return can_be_java_arg(reg);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint Matcher::int_pressure_limit()
|
||||
{
|
||||
@ -3715,13 +3710,6 @@ frame %{
|
||||
// Compiled code's Frame Pointer.
|
||||
frame_pointer(R1); // R1_SP
|
||||
|
||||
// Interpreter stores its frame pointer in a register which is
|
||||
// stored to the stack by I2CAdaptors. I2CAdaptors convert from
|
||||
// interpreted java to compiled java.
|
||||
//
|
||||
// R14_state holds pointer to caller's cInterpreter.
|
||||
interpreter_frame_pointer(R14); // R14_state
|
||||
|
||||
stack_alignment(frame::alignment_in_bytes);
|
||||
|
||||
// Number of outgoing stack slots killed above the
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2025 SAP SE. All rights reserved.
|
||||
* Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -475,19 +475,12 @@ void VM_Version::print_features() {
|
||||
|
||||
void VM_Version::determine_features() {
|
||||
#if defined(ABI_ELFv2)
|
||||
// 1 InstWord per call for the blr instruction.
|
||||
const int code_size = (num_features+1+2*1)*BytesPerInstWord;
|
||||
const int code_size = (num_features + 1 /*blr*/) * BytesPerInstWord;
|
||||
#else
|
||||
// 7 InstWords for each call (function descriptor + blr instruction).
|
||||
const int code_size = (num_features+1+2*7)*BytesPerInstWord;
|
||||
const int code_size = (num_features + 1 /*blr*/ + 6 /* fd */) * BytesPerInstWord;
|
||||
#endif
|
||||
int features = 0;
|
||||
|
||||
// create test area
|
||||
enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size).
|
||||
char test_area[BUFFER_SIZE];
|
||||
char *mid_of_test_area = &test_area[BUFFER_SIZE>>1];
|
||||
|
||||
// Allocate space for the code.
|
||||
ResourceMark rm;
|
||||
CodeBuffer cb("detect_cpu_features", code_size, 0);
|
||||
@ -497,20 +490,13 @@ void VM_Version::determine_features() {
|
||||
_features = VM_Version::all_features_m;
|
||||
|
||||
// Emit code.
|
||||
void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry();
|
||||
void (*test)() = (void(*)())(void *)a->function_entry();
|
||||
uint32_t *code = (uint32_t *)a->pc();
|
||||
// Keep R3_ARG1 unmodified, it contains &field (see below).
|
||||
// Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
|
||||
a->mfdscr(R0);
|
||||
a->darn(R7);
|
||||
a->brw(R5, R6);
|
||||
a->blr();
|
||||
|
||||
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
|
||||
void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry();
|
||||
a->dcbz(R3_ARG1); // R3_ARG1 = addr
|
||||
a->blr();
|
||||
|
||||
uint32_t *code_end = (uint32_t *)a->pc();
|
||||
a->flush();
|
||||
_features = VM_Version::unknown_m;
|
||||
@ -522,18 +508,9 @@ void VM_Version::determine_features() {
|
||||
Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
|
||||
}
|
||||
|
||||
// Measure cache line size.
|
||||
memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.
|
||||
(*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle.
|
||||
int count = 0; // count zeroed bytes
|
||||
for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
|
||||
guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");
|
||||
_L1_data_cache_line_size = count;
|
||||
|
||||
// Execute code. Illegal instructions will be replaced by 0 in the signal handler.
|
||||
VM_Version::_is_determine_features_test_running = true;
|
||||
// We must align the first argument to 16 bytes because of the lqarx check.
|
||||
(*test)(align_up((address)mid_of_test_area, 16), 0);
|
||||
(*test)();
|
||||
VM_Version::_is_determine_features_test_running = false;
|
||||
|
||||
// determine which instructions are legal.
|
||||
@ -550,6 +527,10 @@ void VM_Version::determine_features() {
|
||||
}
|
||||
|
||||
_features = features;
|
||||
|
||||
_L1_data_cache_line_size = VM_Version::get_dcache_line_size();
|
||||
assert(_L1_data_cache_line_size >= DEFAULT_CACHE_LINE_SIZE,
|
||||
"processors with smaller cache line size are no longer supported");
|
||||
}
|
||||
|
||||
// Power 8: Configure Data Stream Control Register.
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2025 SAP SE. All rights reserved.
|
||||
* Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -81,6 +81,9 @@ public:
|
||||
static uint64_t _dscr_val;
|
||||
|
||||
static void initialize_cpu_information(void);
|
||||
|
||||
static int get_dcache_line_size();
|
||||
static int get_icache_line_size();
|
||||
};
|
||||
|
||||
#endif // CPU_PPC_VM_VERSION_PPC_HPP
|
||||
|
||||
@ -42,7 +42,6 @@ define_pd_global(bool, TieredCompilation, false);
|
||||
define_pd_global(intx, CompileThreshold, 1500 );
|
||||
|
||||
define_pd_global(intx, OnStackReplacePercentage, 933 );
|
||||
define_pd_global(intx, NewSizeThreadIncrease, 4*K );
|
||||
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
|
||||
define_pd_global(size_t, ReservedCodeCacheSize, 32*M );
|
||||
define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M );
|
||||
|
||||
@ -47,7 +47,6 @@ define_pd_global(intx, ConditionalMoveLimit, 3);
|
||||
define_pd_global(intx, FreqInlineSize, 325);
|
||||
define_pd_global(intx, MinJumpTableSize, 10);
|
||||
define_pd_global(intx, InteriorEntryAlignment, 16);
|
||||
define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
|
||||
define_pd_global(intx, LoopUnrollLimit, 60);
|
||||
define_pd_global(intx, LoopPercentProfileLimit, 10);
|
||||
// InitialCodeCacheSize derived from specjbb2000 run.
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -140,10 +140,10 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
|
||||
bool should_save_return_value = !_needs_return_buffer;
|
||||
RegSpiller out_reg_spiller(_output_registers);
|
||||
int spill_offset = -1;
|
||||
int out_spill_offset = -1;
|
||||
|
||||
if (should_save_return_value) {
|
||||
spill_offset = 0;
|
||||
out_spill_offset = 0;
|
||||
// spill area can be shared with shadow space and out args,
|
||||
// since they are only used before the call,
|
||||
// and spill area is only used after.
|
||||
@ -168,6 +168,9 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
// FP-> | |
|
||||
// |---------------------| = frame_bottom_offset = frame_size
|
||||
// | (optional) |
|
||||
// | in_reg_spiller area |
|
||||
// |---------------------|
|
||||
// | (optional) |
|
||||
// | capture state buf |
|
||||
// |---------------------| = StubLocations::CAPTURED_STATE_BUFFER
|
||||
// | (optional) |
|
||||
@ -181,6 +184,18 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
GrowableArray<VMStorage> out_regs = ForeignGlobals::replace_place_holders(_input_registers, locs);
|
||||
ArgumentShuffle arg_shuffle(filtered_java_regs, out_regs, shuffle_reg);
|
||||
|
||||
// Need to spill for state capturing runtime call.
|
||||
// The area spilled into is distinct from the capture state buffer.
|
||||
RegSpiller in_reg_spiller(out_regs);
|
||||
int in_spill_offset = -1;
|
||||
if (_captured_state_mask != 0) {
|
||||
// The spill area cannot be shared with the out_spill since
|
||||
// spilling needs to happen before the call. Allocate a new
|
||||
// region in the stack for this spill space.
|
||||
in_spill_offset = allocated_frame_size;
|
||||
allocated_frame_size += in_reg_spiller.spill_size_bytes();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
LogTarget(Trace, foreign, downcall) lt;
|
||||
if (lt.is_enabled()) {
|
||||
@ -226,6 +241,20 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
arg_shuffle.generate(_masm, shuffle_reg, 0, _abi._shadow_space_bytes);
|
||||
__ block_comment("} argument shuffle");
|
||||
|
||||
if (_captured_state_mask != 0) {
|
||||
assert(in_spill_offset != -1, "must be");
|
||||
__ block_comment("{ load initial thread local");
|
||||
in_reg_spiller.generate_spill(_masm, in_spill_offset);
|
||||
|
||||
// Copy the contents of the capture state buffer into thread local
|
||||
__ ld(c_rarg0, Address(sp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
|
||||
__ mv(c_rarg1, _captured_state_mask);
|
||||
__ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_pre));
|
||||
|
||||
in_reg_spiller.generate_fill(_masm, in_spill_offset);
|
||||
__ block_comment("} load initial thread local");
|
||||
}
|
||||
|
||||
__ jalr(as_Register(locs.get(StubLocations::TARGET_ADDRESS)));
|
||||
// this call is assumed not to have killed xthread
|
||||
|
||||
@ -254,15 +283,15 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ block_comment("{ save thread local");
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ ld(c_rarg0, Address(sp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
|
||||
__ mv(c_rarg1, _captured_state_mask);
|
||||
__ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state));
|
||||
__ rt_call(CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_post));
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ block_comment("} save thread local");
|
||||
@ -319,7 +348,7 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
|
||||
if (should_save_return_value) {
|
||||
// Need to save the native result registers around any runtime calls.
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ mv(c_rarg0, xthread);
|
||||
@ -327,7 +356,7 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ rt_call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
}
|
||||
__ j(L_after_safepoint_poll);
|
||||
__ block_comment("} L_safepoint_poll_slow_path");
|
||||
@ -339,13 +368,13 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
|
||||
if (should_save_return_value) {
|
||||
// Need to save the native result registers around any runtime calls.
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
}
|
||||
|
||||
__ j(L_after_reguard);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
||||
// Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
@ -2060,11 +2060,8 @@ bool Matcher::is_generic_vector(MachOper* opnd) {
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
// Return whether or not this register is ever used as an argument.
|
||||
// This function is used on startup to build the trampoline stubs in
|
||||
// generateOptoStub. Registers not mentioned will be killed by the VM
|
||||
// call in the trampoline, and arguments in those registers not be
|
||||
// available to the callee.
|
||||
bool Matcher::can_be_java_arg(int reg)
|
||||
{
|
||||
return
|
||||
@ -2085,11 +2082,7 @@ bool Matcher::can_be_java_arg(int reg)
|
||||
reg == F16_num || reg == F16_H_num ||
|
||||
reg == F17_num || reg == F17_H_num;
|
||||
}
|
||||
|
||||
bool Matcher::is_spillable_arg(int reg)
|
||||
{
|
||||
return can_be_java_arg(reg);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint Matcher::int_pressure_limit()
|
||||
{
|
||||
@ -2274,7 +2267,7 @@ encode %{
|
||||
} else if (rtype == relocInfo::metadata_type) {
|
||||
__ mov_metadata(dst_reg, (Metadata*)con);
|
||||
} else {
|
||||
assert(rtype == relocInfo::none, "unexpected reloc type");
|
||||
assert(rtype == relocInfo::none || rtype == relocInfo::external_word_type, "unexpected reloc type");
|
||||
__ mv(dst_reg, $src$$constant);
|
||||
}
|
||||
}
|
||||
@ -2559,11 +2552,6 @@ frame %{
|
||||
// Compiled code's Frame Pointer
|
||||
frame_pointer(R2);
|
||||
|
||||
// Interpreter stores its frame pointer in a register which is
|
||||
// stored to the stack by I2CAdaptors.
|
||||
// I2CAdaptors convert from interpreted java to compiled java.
|
||||
interpreter_frame_pointer(R8);
|
||||
|
||||
// Stack alignment requirement
|
||||
stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
|
||||
|
||||
|
||||
@ -52,7 +52,6 @@ define_pd_global(size_t, CodeCacheExpansionSize, 32*K);
|
||||
define_pd_global(size_t, CodeCacheMinBlockLength, 1);
|
||||
define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K);
|
||||
define_pd_global(bool, NeverActAsServerClassMachine, true);
|
||||
define_pd_global(size_t, NewSizeThreadIncrease, 16*K);
|
||||
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
|
||||
#endif // !COMPILER2
|
||||
|
||||
|
||||
@ -46,7 +46,6 @@ define_pd_global(intx, OnStackReplacePercentage, 140);
|
||||
define_pd_global(intx, ConditionalMoveLimit, 4);
|
||||
define_pd_global(intx, FreqInlineSize, 325);
|
||||
define_pd_global(intx, InteriorEntryAlignment, 4);
|
||||
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
|
||||
define_pd_global(intx, RegisterCostAreaRatio, 12000);
|
||||
define_pd_global(intx, LoopUnrollLimit, 60);
|
||||
define_pd_global(intx, LoopPercentProfileLimit, 10);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, Red Hat, Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -129,7 +129,7 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
|
||||
assert(!_needs_return_buffer, "unexpected needs_return_buffer");
|
||||
RegSpiller out_reg_spiller(_output_registers);
|
||||
int spill_offset = allocated_frame_size;
|
||||
int out_spill_offset = allocated_frame_size;
|
||||
allocated_frame_size += BytesPerWord;
|
||||
|
||||
StubLocations locs;
|
||||
@ -153,6 +153,18 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
GrowableArray<VMStorage> out_regs = ForeignGlobals::replace_place_holders(_input_registers, locs);
|
||||
ArgumentShuffle arg_shuffle(filtered_java_regs, out_regs, _abi._scratch1);
|
||||
|
||||
// Need to spill for state capturing runtime call.
|
||||
// The area spilled into is distinct from the capture state buffer.
|
||||
RegSpiller in_reg_spiller(out_regs);
|
||||
int in_spill_offset = -1;
|
||||
if (_captured_state_mask != 0) {
|
||||
// The spill area cannot be shared with the out_spill since
|
||||
// spilling needs to happen before the call. Allocate a new
|
||||
// region in the stack for this spill space.
|
||||
in_spill_offset = allocated_frame_size;
|
||||
allocated_frame_size += in_reg_spiller.spill_size_bytes();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
LogTarget(Trace, foreign, downcall) lt;
|
||||
if (lt.is_enabled()) {
|
||||
@ -192,6 +204,21 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
arg_shuffle.generate(_masm, shuffle_reg, frame::z_jit_out_preserve_size, _abi._shadow_space_bytes);
|
||||
__ block_comment("} argument_shuffle");
|
||||
|
||||
if (_captured_state_mask != 0) {
|
||||
assert(in_spill_offset != -1, "must be");
|
||||
__ block_comment("{ load initial thread local");
|
||||
in_reg_spiller.generate_spill(_masm, in_spill_offset);
|
||||
|
||||
// Copy the contents of the capture state buffer into thread local
|
||||
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state_pre));
|
||||
__ z_lg(Z_ARG1, Address(Z_SP, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
|
||||
__ load_const_optimized(Z_ARG2, _captured_state_mask);
|
||||
__ call(call_target_address);
|
||||
|
||||
in_reg_spiller.generate_fill(_masm, in_spill_offset);
|
||||
__ block_comment("} load initial thread local");
|
||||
}
|
||||
|
||||
__ call(as_Register(locs.get(StubLocations::TARGET_ADDRESS)));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -199,14 +226,14 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
if (_captured_state_mask != 0) {
|
||||
__ block_comment("save_thread_local {");
|
||||
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
|
||||
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state));
|
||||
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, DowncallLinker::capture_state_post));
|
||||
__ z_lg(Z_ARG1, Address(Z_SP, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
|
||||
__ load_const_optimized(Z_ARG2, _captured_state_mask);
|
||||
__ call(call_target_address);
|
||||
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
|
||||
__ block_comment("} save_thread_local");
|
||||
}
|
||||
@ -259,13 +286,13 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ bind(L_safepoint_poll_slow_path);
|
||||
|
||||
// Need to save the native result registers around any runtime calls.
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
|
||||
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, JavaThread::check_special_condition_for_native_trans));
|
||||
__ z_lgr(Z_ARG1, Z_thread);
|
||||
__ call(call_target_address);
|
||||
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
|
||||
__ z_bru(L_after_safepoint_poll);
|
||||
__ block_comment("} L_safepoint_poll_slow_path");
|
||||
@ -275,12 +302,12 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ bind(L_reguard);
|
||||
|
||||
// Need to save the native result registers around any runtime calls.
|
||||
out_reg_spiller.generate_spill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_offset);
|
||||
|
||||
__ load_const_optimized(call_target_address, CAST_FROM_FN_PTR(uint64_t, SharedRuntime::reguard_yellow_pages));
|
||||
__ call(call_target_address);
|
||||
|
||||
out_reg_spiller.generate_fill(_masm, spill_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_offset);
|
||||
|
||||
__ z_bru(L_after_reguard);
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2017, 2024 SAP SE. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
@ -1890,10 +1890,8 @@ const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
|
||||
|
||||
const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
|
||||
|
||||
// Return whether or not this register is ever used as an argument. This
|
||||
// function is used on startup to build the trampoline stubs in generateOptoStub.
|
||||
// Registers not mentioned will be killed by the VM call in the trampoline, and
|
||||
// arguments in those registers not be available to the callee.
|
||||
#ifdef ASSERT
|
||||
// Return whether or not this register is ever used as an argument.
|
||||
bool Matcher::can_be_java_arg(int reg) {
|
||||
// We return true for all registers contained in z_iarg_reg[] and
|
||||
// z_farg_reg[] and their virtual halves.
|
||||
@ -1917,10 +1915,7 @@ bool Matcher::can_be_java_arg(int reg) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_spillable_arg(int reg) {
|
||||
return can_be_java_arg(reg);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint Matcher::int_pressure_limit()
|
||||
{
|
||||
@ -2606,13 +2601,6 @@ frame %{
|
||||
// z/Architecture stack pointer
|
||||
frame_pointer(Z_R15); // Z_SP
|
||||
|
||||
// Interpreter stores its frame pointer in a register which is
|
||||
// stored to the stack by I2CAdaptors. I2CAdaptors convert from
|
||||
// interpreted java to compiled java.
|
||||
//
|
||||
// Z_state holds pointer to caller's cInterpreter.
|
||||
interpreter_frame_pointer(Z_R7); // Z_state
|
||||
|
||||
// Use alignment_in_bytes instead of log_2_of_alignment_in_bits.
|
||||
stack_alignment(frame::alignment_in_bytes);
|
||||
|
||||
|
||||
@ -5442,6 +5442,13 @@ void Assembler::pmovsxwd(XMMRegister dst, XMMRegister src) {
|
||||
emit_int16(0x23, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::pmovzxwd(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sse4_1(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x33, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionMark im(this);
|
||||
|
||||
@ -1965,6 +1965,7 @@ private:
|
||||
void pmovsxbq(XMMRegister dst, XMMRegister src);
|
||||
void pmovsxbw(XMMRegister dst, XMMRegister src);
|
||||
void pmovsxwd(XMMRegister dst, XMMRegister src);
|
||||
void pmovzxwd(XMMRegister dst, XMMRegister src);
|
||||
void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
@ -41,7 +41,6 @@ define_pd_global(bool, TieredCompilation, false);
|
||||
define_pd_global(intx, CompileThreshold, 1500 );
|
||||
|
||||
define_pd_global(intx, OnStackReplacePercentage, 933 );
|
||||
define_pd_global(size_t, NewSizeThreadIncrease, 4*K );
|
||||
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
|
||||
define_pd_global(size_t, ReservedCodeCacheSize, 32*M );
|
||||
define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M );
|
||||
|
||||
@ -1729,6 +1729,24 @@ void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegis
|
||||
default: assert(false, "wrong type");
|
||||
}
|
||||
break;
|
||||
case Op_UMinReductionV:
|
||||
switch (typ) {
|
||||
case T_BYTE: vpminub(dst, dst, src, Assembler::AVX_128bit); break;
|
||||
case T_SHORT: vpminuw(dst, dst, src, Assembler::AVX_128bit); break;
|
||||
case T_INT: vpminud(dst, dst, src, Assembler::AVX_128bit); break;
|
||||
case T_LONG: evpminuq(dst, k0, dst, src, true, Assembler::AVX_128bit); break;
|
||||
default: assert(false, "wrong type");
|
||||
}
|
||||
break;
|
||||
case Op_UMaxReductionV:
|
||||
switch (typ) {
|
||||
case T_BYTE: vpmaxub(dst, dst, src, Assembler::AVX_128bit); break;
|
||||
case T_SHORT: vpmaxuw(dst, dst, src, Assembler::AVX_128bit); break;
|
||||
case T_INT: vpmaxud(dst, dst, src, Assembler::AVX_128bit); break;
|
||||
case T_LONG: evpmaxuq(dst, k0, dst, src, true, Assembler::AVX_128bit); break;
|
||||
default: assert(false, "wrong type");
|
||||
}
|
||||
break;
|
||||
case Op_AddReductionVF: addss(dst, src); break;
|
||||
case Op_AddReductionVD: addsd(dst, src); break;
|
||||
case Op_AddReductionVI:
|
||||
@ -1792,6 +1810,24 @@ void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegis
|
||||
default: assert(false, "wrong type");
|
||||
}
|
||||
break;
|
||||
case Op_UMinReductionV:
|
||||
switch (typ) {
|
||||
case T_BYTE: vpminub(dst, src1, src2, vector_len); break;
|
||||
case T_SHORT: vpminuw(dst, src1, src2, vector_len); break;
|
||||
case T_INT: vpminud(dst, src1, src2, vector_len); break;
|
||||
case T_LONG: evpminuq(dst, k0, src1, src2, true, vector_len); break;
|
||||
default: assert(false, "wrong type");
|
||||
}
|
||||
break;
|
||||
case Op_UMaxReductionV:
|
||||
switch (typ) {
|
||||
case T_BYTE: vpmaxub(dst, src1, src2, vector_len); break;
|
||||
case T_SHORT: vpmaxuw(dst, src1, src2, vector_len); break;
|
||||
case T_INT: vpmaxud(dst, src1, src2, vector_len); break;
|
||||
case T_LONG: evpmaxuq(dst, k0, src1, src2, true, vector_len); break;
|
||||
default: assert(false, "wrong type");
|
||||
}
|
||||
break;
|
||||
case Op_AddReductionVI:
|
||||
switch (typ) {
|
||||
case T_BYTE: vpaddb(dst, src1, src2, vector_len); break;
|
||||
@ -2058,7 +2094,11 @@ void C2_MacroAssembler::reduce8B(int opcode, Register dst, Register src1, XMMReg
|
||||
psrldq(vtmp2, 1);
|
||||
reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2);
|
||||
movdl(vtmp2, src1);
|
||||
pmovsxbd(vtmp1, vtmp1);
|
||||
if (opcode == Op_UMinReductionV || opcode == Op_UMaxReductionV) {
|
||||
pmovzxbd(vtmp1, vtmp1);
|
||||
} else {
|
||||
pmovsxbd(vtmp1, vtmp1);
|
||||
}
|
||||
reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
|
||||
pextrb(dst, vtmp1, 0x0);
|
||||
movsbl(dst, dst);
|
||||
@ -2135,7 +2175,11 @@ void C2_MacroAssembler::reduce4S(int opcode, Register dst, Register src1, XMMReg
|
||||
reduce_operation_128(T_SHORT, opcode, vtmp1, vtmp2);
|
||||
}
|
||||
movdl(vtmp2, src1);
|
||||
pmovsxwd(vtmp1, vtmp1);
|
||||
if (opcode == Op_UMinReductionV || opcode == Op_UMaxReductionV) {
|
||||
pmovzxwd(vtmp1, vtmp1);
|
||||
} else {
|
||||
pmovsxwd(vtmp1, vtmp1);
|
||||
}
|
||||
reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
|
||||
pextrw(dst, vtmp1, 0x0);
|
||||
movswl(dst, dst);
|
||||
|
||||
@ -46,7 +46,6 @@ define_pd_global(intx, FreqInlineSize, 325);
|
||||
define_pd_global(intx, MinJumpTableSize, 10);
|
||||
define_pd_global(intx, LoopPercentProfileLimit, 10);
|
||||
define_pd_global(intx, InteriorEntryAlignment, 16);
|
||||
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
|
||||
define_pd_global(intx, LoopUnrollLimit, 60);
|
||||
// InitialCodeCacheSize derived from specjbb2000 run.
|
||||
define_pd_global(size_t, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -145,10 +145,10 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
// when we don't use a return buffer we need to spill the return value around our slow path calls
|
||||
bool should_save_return_value = !_needs_return_buffer;
|
||||
RegSpiller out_reg_spiller(_output_registers);
|
||||
int spill_rsp_offset = -1;
|
||||
int out_spill_rsp_offset = -1;
|
||||
|
||||
if (should_save_return_value) {
|
||||
spill_rsp_offset = 0;
|
||||
out_spill_rsp_offset = 0;
|
||||
// spill area can be shared with shadow space and out args,
|
||||
// since they are only used before the call,
|
||||
// and spill area is only used after.
|
||||
@ -173,6 +173,9 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
// FP-> | |
|
||||
// |---------------------| = frame_bottom_offset = frame_size
|
||||
// | (optional) |
|
||||
// | in_reg_spiller area |
|
||||
// |---------------------|
|
||||
// | (optional) |
|
||||
// | capture state buf |
|
||||
// |---------------------| = StubLocations::CAPTURED_STATE_BUFFER
|
||||
// | (optional) |
|
||||
@ -188,6 +191,18 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
VMStorage shuffle_reg = as_VMStorage(rbx);
|
||||
ArgumentShuffle arg_shuffle(filtered_java_regs, out_regs, shuffle_reg);
|
||||
|
||||
// Need to spill for state capturing runtime call.
|
||||
// The area spilled into is distinct from the capture state buffer.
|
||||
RegSpiller in_reg_spiller(out_regs);
|
||||
int in_spill_rsp_offset = -1;
|
||||
if (_captured_state_mask != 0) {
|
||||
// The spill area cannot be shared with the shadow/out args space
|
||||
// since spilling needs to happen before the call. Allocate a new
|
||||
// region in the stack for this spill space.
|
||||
in_spill_rsp_offset = allocated_frame_size;
|
||||
allocated_frame_size += in_reg_spiller.spill_size_bytes();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
LogTarget(Trace, foreign, downcall) lt;
|
||||
if (lt.is_enabled()) {
|
||||
@ -232,6 +247,19 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
arg_shuffle.generate(_masm, shuffle_reg, 0, _abi._shadow_space_bytes);
|
||||
__ block_comment("} argument shuffle");
|
||||
|
||||
if (_captured_state_mask != 0) {
|
||||
assert(in_spill_rsp_offset != -1, "must be");
|
||||
__ block_comment("{ load initial thread local");
|
||||
in_reg_spiller.generate_spill(_masm, in_spill_rsp_offset);
|
||||
|
||||
// Copy the contents of the capture state buffer into thread local
|
||||
__ movptr(c_rarg0, Address(rsp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
|
||||
__ movl(c_rarg1, _captured_state_mask);
|
||||
runtime_call(_masm, CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_pre));
|
||||
|
||||
in_reg_spiller.generate_fill(_masm, in_spill_rsp_offset);
|
||||
__ block_comment("} load initial thread local");
|
||||
}
|
||||
__ call(as_Register(locs.get(StubLocations::TARGET_ADDRESS)));
|
||||
assert(!_abi.is_volatile_reg(r15_thread), "Call assumed not to kill r15");
|
||||
|
||||
@ -258,15 +286,15 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ block_comment("{ save thread local");
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_spill(_masm, spill_rsp_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_rsp_offset);
|
||||
}
|
||||
|
||||
__ movptr(c_rarg0, Address(rsp, locs.data_offset(StubLocations::CAPTURED_STATE_BUFFER)));
|
||||
__ movl(c_rarg1, _captured_state_mask);
|
||||
runtime_call(_masm, CAST_FROM_FN_PTR(address, DowncallLinker::capture_state));
|
||||
runtime_call(_masm, CAST_FROM_FN_PTR(address, DowncallLinker::capture_state_post));
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_rsp_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_rsp_offset);
|
||||
}
|
||||
|
||||
__ block_comment("} save thread local");
|
||||
@ -319,14 +347,14 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ bind(L_safepoint_poll_slow_path);
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_spill(_masm, spill_rsp_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_rsp_offset);
|
||||
}
|
||||
|
||||
__ mov(c_rarg0, r15_thread);
|
||||
runtime_call(_masm, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_rsp_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_rsp_offset);
|
||||
}
|
||||
|
||||
__ jmp(L_after_safepoint_poll);
|
||||
@ -338,13 +366,13 @@ void DowncallLinker::StubGenerator::generate() {
|
||||
__ bind(L_reguard);
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_spill(_masm, spill_rsp_offset);
|
||||
out_reg_spiller.generate_spill(_masm, out_spill_rsp_offset);
|
||||
}
|
||||
|
||||
runtime_call(_masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
|
||||
|
||||
if (should_save_return_value) {
|
||||
out_reg_spiller.generate_fill(_masm, spill_rsp_offset);
|
||||
out_reg_spiller.generate_fill(_masm, out_spill_rsp_offset);
|
||||
}
|
||||
|
||||
__ jmp(L_after_reguard);
|
||||
|
||||
@ -961,7 +961,7 @@ void MacroAssembler::call(AddressLiteral entry, Register rscratch) {
|
||||
void MacroAssembler::ic_call(address entry, jint method_index) {
|
||||
RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
|
||||
// Needs full 64-bit immediate for later patching.
|
||||
mov64(rax, (int64_t)Universe::non_oop_word());
|
||||
Assembler::mov64(rax, (int64_t)Universe::non_oop_word());
|
||||
call(AddressLiteral(entry, rh));
|
||||
}
|
||||
|
||||
@ -1961,6 +1961,20 @@ void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src, Register rscrat
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::mov64(Register dst, int64_t imm64) {
|
||||
if (is_uimm32(imm64)) {
|
||||
movl(dst, checked_cast<uint32_t>(imm64));
|
||||
} else if (is_simm32(imm64)) {
|
||||
movq(dst, checked_cast<int32_t>(imm64));
|
||||
} else {
|
||||
Assembler::mov64(dst, imm64);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format) {
|
||||
Assembler::mov64(dst, imm64, rtype, format);
|
||||
}
|
||||
|
||||
void MacroAssembler::movptr(Register dst, Register src) {
|
||||
movq(dst, src);
|
||||
}
|
||||
@ -1971,13 +1985,7 @@ void MacroAssembler::movptr(Register dst, Address src) {
|
||||
|
||||
// src should NEVER be a real pointer. Use AddressLiteral for true pointers
|
||||
void MacroAssembler::movptr(Register dst, intptr_t src) {
|
||||
if (is_uimm32(src)) {
|
||||
movl(dst, checked_cast<uint32_t>(src));
|
||||
} else if (is_simm32(src)) {
|
||||
movq(dst, checked_cast<int32_t>(src));
|
||||
} else {
|
||||
mov64(dst, src);
|
||||
}
|
||||
mov64(dst, src);
|
||||
}
|
||||
|
||||
void MacroAssembler::movptr(Address dst, Register src) {
|
||||
|
||||
@ -1869,6 +1869,9 @@ public:
|
||||
void mov_metadata(Register dst, Metadata* obj);
|
||||
void mov_metadata(Address dst, Metadata* obj, Register rscratch);
|
||||
|
||||
void mov64(Register dst, int64_t imm64);
|
||||
void mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format);
|
||||
|
||||
void movptr(Register dst, Register src);
|
||||
void movptr(Register dst, Address src);
|
||||
void movptr(Register dst, AddressLiteral src);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -330,6 +330,19 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
|
||||
|
||||
// Shared implementation for ECB/AES Encrypt and Decrypt, which does 4 blocks
|
||||
// in a loop at a time to hide instruction latency. Set is_encrypt=true for
|
||||
// encryption, false for decryption.
|
||||
address generate_electronicCodeBook_AESCrypt_Parallel(bool is_encrypt);
|
||||
|
||||
// A version of ECB/AES Encrypt which does 4 blocks in a loop at a time
|
||||
// to hide instruction latency
|
||||
address generate_electronicCodeBook_encryptAESCrypt_Parallel();
|
||||
|
||||
// A version of ECB/AES Decrypt which does 4 blocks in a loop at a time
|
||||
// to hide instruction latency
|
||||
address generate_electronicCodeBook_decryptAESCrypt_Parallel();
|
||||
|
||||
// Vector AES Galois Counter Mode implementation
|
||||
address generate_galoisCounterMode_AESCrypt();
|
||||
void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2025, Intel Corporation. All rights reserved.
|
||||
* Copyright (c) 2019, 2026, Intel Corporation. All rights reserved.
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -218,6 +218,8 @@ void StubGenerator::generate_aes_stubs() {
|
||||
StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
|
||||
} else {
|
||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
||||
StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt_Parallel();
|
||||
StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt_Parallel();
|
||||
if (VM_Version::supports_avx2()) {
|
||||
StubRoutines::_galoisCounterMode_AESCrypt = generate_avx2_galoisCounterMode_AESCrypt();
|
||||
}
|
||||
@ -1471,6 +1473,200 @@ address StubGenerator::generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
return start;
|
||||
}
|
||||
|
||||
// This is a version of ECB/AES Encrypt/Decrypt which does 4 blocks in a loop
|
||||
// at a time to hide instruction latency.
|
||||
//
|
||||
// For encryption (is_encrypt=true):
|
||||
// pxor key[0], aesenc key[1..rounds-1], aesenclast key[rounds]
|
||||
// For decryption (is_encrypt=false):
|
||||
// pxor key[1], aesdec key[2..rounds], aesdeclast key[0]
|
||||
//
|
||||
// Arguments:
|
||||
//
|
||||
// Inputs:
|
||||
// c_rarg0 - source byte array address
|
||||
// c_rarg1 - destination byte array address
|
||||
// c_rarg2 - session key (Ke/Kd) in little endian int array
|
||||
// c_rarg3 - input length (must be multiple of blocksize 16)
|
||||
//
|
||||
// Output:
|
||||
// rax - input length
|
||||
//
|
||||
address StubGenerator::generate_electronicCodeBook_AESCrypt_Parallel(bool is_encrypt) {
|
||||
assert(UseAES, "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubId stub_id = is_encrypt ? StubId::stubgen_electronicCodeBook_encryptAESCrypt_id
|
||||
: StubId::stubgen_electronicCodeBook_decryptAESCrypt_id;
|
||||
StubCodeMark mark(this, stub_id);
|
||||
address start = __ pc();
|
||||
|
||||
const Register from = c_rarg0; // source array address
|
||||
const Register to = c_rarg1; // destination array address
|
||||
const Register key = c_rarg2; // key array address
|
||||
const Register len_reg = c_rarg3; // src len (must be multiple of blocksize 16)
|
||||
const Register pos = rax;
|
||||
const Register keylen = r11;
|
||||
|
||||
const XMMRegister xmm_result0 = xmm0;
|
||||
const XMMRegister xmm_result1 = xmm1;
|
||||
const XMMRegister xmm_result2 = xmm2;
|
||||
const XMMRegister xmm_result3 = xmm3;
|
||||
const XMMRegister xmm_key_shuf_mask = xmm4;
|
||||
const XMMRegister xmm_key_tmp = xmm5;
|
||||
// keys 0-9 pre-loaded into xmm6-xmm15
|
||||
const int XMM_REG_NUM_KEY_FIRST = 6;
|
||||
const int XMM_REG_NUM_KEY_LAST = 15;
|
||||
const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
|
||||
|
||||
// for key_128, key_192, key_256
|
||||
const int ROUNDS[3] = {10, 12, 14};
|
||||
|
||||
Label L_exit;
|
||||
Label L_loop4[3], L_single[3], L_done[3];
|
||||
|
||||
#ifdef DoFour
|
||||
#undef DoFour
|
||||
#endif
|
||||
#ifdef DoOne
|
||||
#undef DoOne
|
||||
#endif
|
||||
|
||||
#define DoFour(opc, reg) \
|
||||
__ opc(xmm_result0, reg); \
|
||||
__ opc(xmm_result1, reg); \
|
||||
__ opc(xmm_result2, reg); \
|
||||
__ opc(xmm_result3, reg);
|
||||
|
||||
#define DoOne(opc, reg) \
|
||||
__ opc(xmm_result0, reg);
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ push(len_reg); // save original length for return value
|
||||
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()), r10 /*rscratch*/);
|
||||
// load up xmm regs 6 thru 15 with keys 0x00 - 0x90
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++, offset += 0x10) {
|
||||
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
|
||||
}
|
||||
__ xorptr(pos, pos);
|
||||
|
||||
// key length could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ cmpl(keylen, 52);
|
||||
__ jcc(Assembler::equal, L_loop4[1]);
|
||||
__ cmpl(keylen, 60);
|
||||
__ jcc(Assembler::equal, L_loop4[2]);
|
||||
|
||||
// k == 0: generate code for key_128
|
||||
// k == 1: generate code for key_192
|
||||
// k == 2: generate code for key_256
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_loop4[k]);
|
||||
__ cmpptr(len_reg, 4 * AESBlockSize);
|
||||
__ jcc(Assembler::less, L_single[k]);
|
||||
|
||||
__ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
|
||||
__ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
|
||||
__ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
|
||||
__ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
|
||||
|
||||
if (is_encrypt) {
|
||||
DoFour(pxor, xmm_key_first);
|
||||
for (int rnum = 1; rnum < 10; rnum++) {
|
||||
DoFour(aesenc, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
|
||||
}
|
||||
for (int i = 10; i < ROUNDS[k]; i++) {
|
||||
load_key(xmm_key_tmp, key, i * 0x10, xmm_key_shuf_mask);
|
||||
DoFour(aesenc, xmm_key_tmp);
|
||||
}
|
||||
load_key(xmm_key_tmp, key, ROUNDS[k] * 0x10, xmm_key_shuf_mask);
|
||||
DoFour(aesenclast, xmm_key_tmp);
|
||||
} else {
|
||||
DoFour(pxor, as_XMMRegister(1 + XMM_REG_NUM_KEY_FIRST));
|
||||
for (int rnum = 2; rnum < 10; rnum++) {
|
||||
DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
|
||||
}
|
||||
for (int i = 10; i <= ROUNDS[k]; i++) {
|
||||
load_key(xmm_key_tmp, key, i * 0x10, xmm_key_shuf_mask);
|
||||
DoFour(aesdec, xmm_key_tmp);
|
||||
}
|
||||
DoFour(aesdeclast, xmm_key_first);
|
||||
}
|
||||
|
||||
__ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
|
||||
__ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
|
||||
__ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
|
||||
__ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
|
||||
|
||||
__ addptr(pos, 4 * AESBlockSize);
|
||||
__ subptr(len_reg, 4 * AESBlockSize);
|
||||
__ jmp(L_loop4[k]);
|
||||
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_single[k]);
|
||||
__ cmpptr(len_reg, AESBlockSize);
|
||||
__ jcc(Assembler::less, L_done[k]);
|
||||
|
||||
__ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0));
|
||||
|
||||
if (is_encrypt) {
|
||||
DoOne(pxor, xmm_key_first);
|
||||
for (int rnum = 1; rnum < 10; rnum++) {
|
||||
DoOne(aesenc, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
|
||||
}
|
||||
for (int i = 10; i < ROUNDS[k]; i++) {
|
||||
load_key(xmm_key_tmp, key, i * 0x10, xmm_key_shuf_mask);
|
||||
DoOne(aesenc, xmm_key_tmp);
|
||||
}
|
||||
load_key(xmm_key_tmp, key, ROUNDS[k] * 0x10, xmm_key_shuf_mask);
|
||||
DoOne(aesenclast, xmm_key_tmp);
|
||||
} else {
|
||||
DoOne(pxor, as_XMMRegister(1 + XMM_REG_NUM_KEY_FIRST));
|
||||
for (int rnum = 2; rnum < 10; rnum++) {
|
||||
DoOne(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
|
||||
}
|
||||
for (int i = 10; i <= ROUNDS[k]; i++) {
|
||||
load_key(xmm_key_tmp, key, i * 0x10, xmm_key_shuf_mask);
|
||||
DoOne(aesdec, xmm_key_tmp);
|
||||
}
|
||||
DoOne(aesdeclast, xmm_key_first);
|
||||
}
|
||||
|
||||
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result0);
|
||||
__ addptr(pos, AESBlockSize);
|
||||
__ subptr(len_reg, AESBlockSize);
|
||||
__ jmp(L_single[k]);
|
||||
|
||||
__ BIND(L_done[k]);
|
||||
if (k < 2) __ jmp(L_exit);
|
||||
} //for key_128/192/256
|
||||
|
||||
__ BIND(L_exit);
|
||||
// Clear all XMM registers holding sensitive key material before returning
|
||||
__ pxor(xmm_key_tmp, xmm_key_tmp);
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
|
||||
__ pxor(as_XMMRegister(rnum), as_XMMRegister(rnum));
|
||||
}
|
||||
__ pop(rax);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
#undef DoFour
|
||||
#undef DoOne
|
||||
}
|
||||
|
||||
address StubGenerator::generate_electronicCodeBook_encryptAESCrypt_Parallel() {
|
||||
return generate_electronicCodeBook_AESCrypt_Parallel(true);
|
||||
}
|
||||
|
||||
address StubGenerator::generate_electronicCodeBook_decryptAESCrypt_Parallel() {
|
||||
return generate_electronicCodeBook_AESCrypt_Parallel(false);
|
||||
}
|
||||
|
||||
// This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time
|
||||
// to hide instruction latency
|
||||
//
|
||||
@ -1571,7 +1767,7 @@ address StubGenerator::generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
|
||||
__ opc(xmm_result0, src_reg); \
|
||||
__ opc(xmm_result1, src_reg); \
|
||||
__ opc(xmm_result2, src_reg); \
|
||||
__ opc(xmm_result3, src_reg); \
|
||||
__ opc(xmm_result3, src_reg);
|
||||
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
__ BIND(L_multiBlock_loopTopHead[k]);
|
||||
|
||||
@ -958,9 +958,17 @@ void VM_Version::get_processor_features() {
|
||||
if (UseSSE < 1)
|
||||
_features.clear_feature(CPU_SSE);
|
||||
|
||||
//since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
|
||||
if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
|
||||
UseAVX = 0;
|
||||
// ZX cpus specific settings
|
||||
if (is_zx() && FLAG_IS_DEFAULT(UseAVX)) {
|
||||
if (cpu_family() == 7) {
|
||||
if (extended_cpu_model() == 0x5B || extended_cpu_model() == 0x6B) {
|
||||
UseAVX = 1;
|
||||
} else if (extended_cpu_model() == 0x1B || extended_cpu_model() == 0x3B) {
|
||||
UseAVX = 0;
|
||||
}
|
||||
} else if (cpu_family() == 6) {
|
||||
UseAVX = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// UseSSE is set to the smaller of what hardware supports and what
|
||||
@ -2623,6 +2631,23 @@ const char* VM_Version::cpu_family_description(void) {
|
||||
return _family_id_intel[cpu_family_id];
|
||||
}
|
||||
}
|
||||
if (is_zx()) {
|
||||
int cpu_model_id = extended_cpu_model();
|
||||
if (cpu_family_id == 7) {
|
||||
switch (cpu_model_id) {
|
||||
case 0x1B:
|
||||
return "wudaokou";
|
||||
case 0x3B:
|
||||
return "lujiazui";
|
||||
case 0x5B:
|
||||
return "yongfeng";
|
||||
case 0x6B:
|
||||
return "shijidadao";
|
||||
}
|
||||
} else if (cpu_family_id == 6) {
|
||||
return "zhangjiang";
|
||||
}
|
||||
}
|
||||
if (is_hygon()) {
|
||||
return "Dhyana";
|
||||
}
|
||||
@ -2642,6 +2667,9 @@ int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
|
||||
} else if (is_amd()) {
|
||||
cpu_type = "AMD";
|
||||
x64 = cpu_is_em64t() ? " AMD64" : "";
|
||||
} else if (is_zx()) {
|
||||
cpu_type = "Zhaoxin";
|
||||
x64 = cpu_is_em64t() ? " x86_64" : "";
|
||||
} else if (is_hygon()) {
|
||||
cpu_type = "Hygon";
|
||||
x64 = cpu_is_em64t() ? " AMD64" : "";
|
||||
@ -3259,6 +3287,12 @@ int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
|
||||
} else {
|
||||
return 128; // Athlon
|
||||
}
|
||||
} else if (is_zx()) {
|
||||
if (supports_sse2()) {
|
||||
return 256;
|
||||
} else {
|
||||
return 128;
|
||||
}
|
||||
} else { // Intel
|
||||
if (supports_sse3() && is_intel_server_family()) {
|
||||
if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
|
||||
|
||||
@ -828,7 +828,7 @@ public:
|
||||
static uint32_t cpu_stepping() { return _cpuid_info.cpu_stepping(); }
|
||||
static int cpu_family() { return _cpu;}
|
||||
static bool is_P6() { return cpu_family() >= 6; }
|
||||
static bool is_intel_server_family() { return cpu_family() == 6 || cpu_family() == 19; }
|
||||
static bool is_intel_server_family() { return cpu_family() == 6 || cpu_family() == 18 || cpu_family() == 19; }
|
||||
static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
|
||||
static bool is_hygon() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x6F677948; } // 'ogyH'
|
||||
static bool is_amd_family() { return is_amd() || is_hygon(); }
|
||||
|
||||
@ -2726,11 +2726,8 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
|
||||
return (-128 <= offset && offset <= 127);
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
// Return whether or not this register is ever used as an argument.
|
||||
// This function is used on startup to build the trampoline stubs in
|
||||
// generateOptoStub. Registers not mentioned will be killed by the VM
|
||||
// call in the trampoline, and arguments in those registers not be
|
||||
// available to the callee.
|
||||
bool Matcher::can_be_java_arg(int reg)
|
||||
{
|
||||
return
|
||||
@ -2750,11 +2747,7 @@ bool Matcher::can_be_java_arg(int reg)
|
||||
reg == XMM6_num || reg == XMM6b_num ||
|
||||
reg == XMM7_num || reg == XMM7b_num;
|
||||
}
|
||||
|
||||
bool Matcher::is_spillable_arg(int reg)
|
||||
{
|
||||
return can_be_java_arg(reg);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint Matcher::int_pressure_limit()
|
||||
{
|
||||
@ -3341,6 +3334,18 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_UMinReductionV:
|
||||
case Op_UMaxReductionV:
|
||||
if (UseAVX == 0) {
|
||||
return false;
|
||||
}
|
||||
if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
|
||||
return false;
|
||||
}
|
||||
if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_MaxV:
|
||||
case Op_MinV:
|
||||
if (UseSSE < 4 && is_integral_type(bt)) {
|
||||
@ -4679,11 +4684,6 @@ frame
|
||||
// Compiled code's Frame Pointer
|
||||
frame_pointer(RSP);
|
||||
|
||||
// Interpreter stores its frame pointer in a register which is
|
||||
// stored to the stack by I2CAdaptors.
|
||||
// I2CAdaptors convert from interpreted java to compiled java.
|
||||
interpreter_frame_pointer(RBP);
|
||||
|
||||
// Stack alignment requirement
|
||||
stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
|
||||
|
||||
@ -19371,6 +19371,8 @@ instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm
|
||||
match(Set dst (XorReductionV src1 src2));
|
||||
match(Set dst (MinReductionV src1 src2));
|
||||
match(Set dst (MaxReductionV src1 src2));
|
||||
match(Set dst (UMinReductionV src1 src2));
|
||||
match(Set dst (UMaxReductionV src1 src2));
|
||||
effect(TEMP vtmp1, TEMP vtmp2);
|
||||
format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
|
||||
ins_encode %{
|
||||
@ -19392,6 +19394,8 @@ instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtm
|
||||
match(Set dst (XorReductionV src1 src2));
|
||||
match(Set dst (MinReductionV src1 src2));
|
||||
match(Set dst (MaxReductionV src1 src2));
|
||||
match(Set dst (UMinReductionV src1 src2));
|
||||
match(Set dst (UMaxReductionV src1 src2));
|
||||
effect(TEMP vtmp1, TEMP vtmp2);
|
||||
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
|
||||
ins_encode %{
|
||||
@ -19411,6 +19415,8 @@ instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtm
|
||||
match(Set dst (XorReductionV src1 src2));
|
||||
match(Set dst (MinReductionV src1 src2));
|
||||
match(Set dst (MaxReductionV src1 src2));
|
||||
match(Set dst (UMinReductionV src1 src2));
|
||||
match(Set dst (UMaxReductionV src1 src2));
|
||||
effect(TEMP vtmp1, TEMP vtmp2);
|
||||
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
|
||||
ins_encode %{
|
||||
@ -19639,6 +19645,8 @@ instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm
|
||||
match(Set dst (XorReductionV src1 src2));
|
||||
match(Set dst (MinReductionV src1 src2));
|
||||
match(Set dst (MaxReductionV src1 src2));
|
||||
match(Set dst (UMinReductionV src1 src2));
|
||||
match(Set dst (UMaxReductionV src1 src2));
|
||||
effect(TEMP vtmp1, TEMP vtmp2);
|
||||
format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
|
||||
ins_encode %{
|
||||
@ -19657,6 +19665,8 @@ instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtm
|
||||
match(Set dst (XorReductionV src1 src2));
|
||||
match(Set dst (MinReductionV src1 src2));
|
||||
match(Set dst (MaxReductionV src1 src2));
|
||||
match(Set dst (UMinReductionV src1 src2));
|
||||
match(Set dst (UMaxReductionV src1 src2));
|
||||
effect(TEMP vtmp1, TEMP vtmp2);
|
||||
format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
|
||||
ins_encode %{
|
||||
@ -19678,6 +19688,8 @@ instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm
|
||||
match(Set dst (XorReductionV src1 src2));
|
||||
match(Set dst (MinReductionV src1 src2));
|
||||
match(Set dst (MaxReductionV src1 src2));
|
||||
match(Set dst (UMinReductionV src1 src2));
|
||||
match(Set dst (UMaxReductionV src1 src2));
|
||||
effect(TEMP vtmp1, TEMP vtmp2);
|
||||
format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
|
||||
ins_encode %{
|
||||
|
||||
@ -37,16 +37,6 @@
|
||||
range, \
|
||||
constraint) \
|
||||
\
|
||||
/* Whether to allow the VM to run if EXTSHM=ON. EXTSHM is an environment */ \
|
||||
/* variable used on AIX to activate certain hacks which allow more shm segments */\
|
||||
/* for 32bit processes. For 64bit processes, it is pointless and may have */ \
|
||||
/* harmful side effects (e.g. for some reasonn prevents allocation of 64k pages */\
|
||||
/* via shmctl). */ \
|
||||
/* Per default we quit with an error if that variable is found; for certain */ \
|
||||
/* customer scenarios, we may want to be able to run despite that variable. */ \
|
||||
product(bool, AllowExtshm, false, DIAGNOSTIC, \
|
||||
"Allow VM to run with EXTSHM=ON.") \
|
||||
\
|
||||
/* Maximum expected size of the data segment. That correlates with the */ \
|
||||
/* maximum C Heap consumption we expect. */ \
|
||||
/* We need to leave "breathing space" for the data segment when */ \
|
||||
|
||||
@ -126,7 +126,6 @@ int mread_real_time(timebasestruct_t *t, size_t size_of_timebasestruct_t);
|
||||
|
||||
// for multipage initialization error analysis (in 'g_multipage_error')
|
||||
#define ERROR_MP_OS_TOO_OLD 100
|
||||
#define ERROR_MP_EXTSHM_ACTIVE 101
|
||||
#define ERROR_MP_VMGETINFO_FAILED 102
|
||||
#define ERROR_MP_VMGETINFO_CLAIMS_NO_SUPPORT_FOR_64K 103
|
||||
|
||||
@ -178,9 +177,6 @@ uint32_t os::Aix::_os_version = 0;
|
||||
// -1 = uninitialized, 0 - no, 1 - yes
|
||||
int os::Aix::_xpg_sus_mode = -1;
|
||||
|
||||
// -1 = uninitialized, 0 - no, 1 - yes
|
||||
int os::Aix::_extshm = -1;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// local variables
|
||||
|
||||
@ -1195,13 +1191,6 @@ void os::print_memory_info(outputStream* st) {
|
||||
const char* const ldr_cntrl = ::getenv("LDR_CNTRL");
|
||||
st->print_cr(" LDR_CNTRL=%s.", ldr_cntrl ? ldr_cntrl : "<unset>");
|
||||
|
||||
// Print out EXTSHM because it is an unsupported setting.
|
||||
const char* const extshm = ::getenv("EXTSHM");
|
||||
st->print_cr(" EXTSHM=%s.", extshm ? extshm : "<unset>");
|
||||
if ( (strcmp(extshm, "on") == 0) || (strcmp(extshm, "ON") == 0) ) {
|
||||
st->print_cr(" *** Unsupported! Please remove EXTSHM from your environment! ***");
|
||||
}
|
||||
|
||||
// Print out AIXTHREAD_GUARDPAGES because it affects the size of pthread stacks.
|
||||
const char* const aixthread_guardpages = ::getenv("AIXTHREAD_GUARDPAGES");
|
||||
st->print_cr(" AIXTHREAD_GUARDPAGES=%s.",
|
||||
@ -2133,8 +2122,6 @@ void os::init(void) {
|
||||
|
||||
// datapsize = 64k. Data segment, thread stacks are 64k paged.
|
||||
// This normally means that we can allocate 64k pages dynamically.
|
||||
// (There is one special case where this may be false: EXTSHM=on.
|
||||
// but we decided to not support that mode).
|
||||
assert0(g_multipage_support.can_use_64K_pages || g_multipage_support.can_use_64K_mmap_pages);
|
||||
set_page_size(64*K);
|
||||
|
||||
@ -2543,28 +2530,13 @@ void os::Aix::initialize_os_info() {
|
||||
void os::Aix::scan_environment() {
|
||||
|
||||
char* p;
|
||||
int rc;
|
||||
|
||||
// Warn explicitly if EXTSHM=ON is used. That switch changes how
|
||||
// System V shared memory behaves. One effect is that page size of
|
||||
// shared memory cannot be change dynamically, effectivly preventing
|
||||
// large pages from working.
|
||||
// This switch was needed on AIX 32bit, but on AIX 64bit the general
|
||||
// recommendation is (in OSS notes) to switch it off.
|
||||
// Reject EXTSHM=ON. That switch changes how System V shared memory behaves
|
||||
// and prevents allocation of 64k pages for the heap.
|
||||
p = ::getenv("EXTSHM");
|
||||
trcVerbose("EXTSHM=%s.", p ? p : "<unset>");
|
||||
if (p && strcasecmp(p, "ON") == 0) {
|
||||
_extshm = 1;
|
||||
log_warning(os)("*** Unsupported mode! Please remove EXTSHM from your environment! ***");
|
||||
if (!AllowExtshm) {
|
||||
// We allow under certain conditions the user to continue. However, we want this
|
||||
// to be a fatal error by default. On certain AIX systems, leaving EXTSHM=ON means
|
||||
// that the VM is not able to allocate 64k pages for the heap.
|
||||
// We do not want to run with reduced performance.
|
||||
vm_exit_during_initialization("EXTSHM is ON. Please remove EXTSHM from your environment.");
|
||||
}
|
||||
} else {
|
||||
_extshm = 0;
|
||||
vm_exit_during_initialization("EXTSHM is ON. Please remove EXTSHM from your environment.");
|
||||
}
|
||||
|
||||
// SPEC1170 behaviour: will change the behaviour of a number of POSIX APIs.
|
||||
|
||||
@ -49,11 +49,6 @@ class os::Aix {
|
||||
// 1 - SPEC1170 requested (XPG_SUS_ENV is ON)
|
||||
static int _xpg_sus_mode;
|
||||
|
||||
// -1 = uninitialized,
|
||||
// 0 - EXTSHM=OFF or not set
|
||||
// 1 - EXTSHM=ON
|
||||
static int _extshm;
|
||||
|
||||
static bool available_memory(physical_memory_size_type& value);
|
||||
static bool free_memory(physical_memory_size_type& value);
|
||||
static physical_memory_size_type physical_memory() { return _physical_memory; }
|
||||
@ -111,12 +106,6 @@ class os::Aix {
|
||||
return _xpg_sus_mode;
|
||||
}
|
||||
|
||||
// Returns true if EXTSHM=ON.
|
||||
static bool extshm() {
|
||||
assert(_extshm != -1, "not initialized");
|
||||
return _extshm;
|
||||
}
|
||||
|
||||
// result struct for get_meminfo()
|
||||
struct meminfo_t {
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -81,27 +81,37 @@ bool OSXSemaphore::timedwait(int64_t millis) {
|
||||
|
||||
// kernel semaphores take a relative timeout
|
||||
mach_timespec_t waitspec;
|
||||
int secs = millis / MILLIUNITS;
|
||||
int nsecs = millis_to_nanos(millis % MILLIUNITS);
|
||||
waitspec.tv_sec = secs;
|
||||
waitspec.tv_nsec = nsecs;
|
||||
int64_t starttime;
|
||||
const bool is_trywait = millis == 0;
|
||||
|
||||
int64_t starttime = os::javaTimeNanos();
|
||||
if (!is_trywait) {
|
||||
int secs = millis / MILLIUNITS;
|
||||
int nsecs = millis_to_nanos(millis % MILLIUNITS);
|
||||
waitspec.tv_sec = secs;
|
||||
waitspec.tv_nsec = nsecs;
|
||||
|
||||
starttime = os::javaTimeNanos();
|
||||
} else {
|
||||
waitspec.tv_sec = 0;
|
||||
waitspec.tv_nsec = 0;
|
||||
}
|
||||
|
||||
kr = semaphore_timedwait(_semaphore, waitspec);
|
||||
while (kr == KERN_ABORTED) {
|
||||
// reduce the timeout and try again
|
||||
int64_t totalwait = millis_to_nanos(millis);
|
||||
int64_t current = os::javaTimeNanos();
|
||||
int64_t passedtime = current - starttime;
|
||||
if (!is_trywait) {
|
||||
// reduce the timeout and try again
|
||||
int64_t totalwait = millis_to_nanos(millis);
|
||||
int64_t current = os::javaTimeNanos();
|
||||
int64_t passedtime = current - starttime;
|
||||
|
||||
if (passedtime >= totalwait) {
|
||||
waitspec.tv_sec = 0;
|
||||
waitspec.tv_nsec = 0;
|
||||
} else {
|
||||
int64_t waittime = totalwait - (current - starttime);
|
||||
waitspec.tv_sec = waittime / NANOSECS_PER_SEC;
|
||||
waitspec.tv_nsec = waittime % NANOSECS_PER_SEC;
|
||||
if (passedtime >= totalwait) {
|
||||
waitspec.tv_sec = 0;
|
||||
waitspec.tv_nsec = 0;
|
||||
} else {
|
||||
int64_t waittime = totalwait - (current - starttime);
|
||||
waitspec.tv_sec = waittime / NANOSECS_PER_SEC;
|
||||
waitspec.tv_nsec = waittime % NANOSECS_PER_SEC;
|
||||
}
|
||||
}
|
||||
|
||||
kr = semaphore_timedwait(_semaphore, waitspec);
|
||||
|
||||
36
src/hotspot/os_cpu/aix_ppc/vm_version_aix_ppc.cpp
Normal file
36
src/hotspot/os_cpu/aix_ppc/vm_version_aix_ppc.cpp
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/vm_version.hpp"
|
||||
|
||||
#include <sys/systemcfg.h>
|
||||
|
||||
int VM_Version::get_dcache_line_size() {
|
||||
return _system_configuration.dcache_line;
|
||||
}
|
||||
|
||||
int VM_Version::get_icache_line_size() {
|
||||
return _system_configuration.icache_line;
|
||||
}
|
||||
44
src/hotspot/os_cpu/linux_ppc/vm_version_linux_ppc.cpp
Normal file
44
src/hotspot/os_cpu/linux_ppc/vm_version_linux_ppc.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2026 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/vm_version.hpp"
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
int VM_Version::get_dcache_line_size() {
|
||||
// This should work on all modern linux versions:
|
||||
int size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
|
||||
// It may fail with very old linux / glibc versions. We use DEFAULT_CACHE_LINE_SIZE in this case.
|
||||
// That is the correct value for all currently supported processors.
|
||||
return (size <= 0) ? DEFAULT_CACHE_LINE_SIZE : size;
|
||||
}
|
||||
|
||||
int VM_Version::get_icache_line_size() {
|
||||
// This should work on all modern linux versions:
|
||||
int size = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
|
||||
// It may fail with very old linux / glibc versions. We use DEFAULT_CACHE_LINE_SIZE in this case.
|
||||
// That is the correct value for all currently supported processors.
|
||||
return (size <= 0) ? DEFAULT_CACHE_LINE_SIZE : size;
|
||||
}
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -993,9 +993,6 @@ void ADLParser::frame_parse(void) {
|
||||
if (strcmp(token,"frame_pointer")==0) {
|
||||
frame_pointer_parse(frame, false);
|
||||
}
|
||||
if (strcmp(token,"interpreter_frame_pointer")==0) {
|
||||
interpreter_frame_pointer_parse(frame, false);
|
||||
}
|
||||
if (strcmp(token,"inline_cache_reg")==0) {
|
||||
inline_cache_parse(frame, false);
|
||||
}
|
||||
@ -1119,11 +1116,6 @@ void ADLParser::frame_pointer_parse(FrameForm *frame, bool native) {
|
||||
else { frame->_frame_pointer = frame_pointer; }
|
||||
}
|
||||
|
||||
//------------------------------interpreter_frame_pointer_parse----------------------------
|
||||
void ADLParser::interpreter_frame_pointer_parse(FrameForm *frame, bool native) {
|
||||
frame->_interpreter_frame_pointer_reg = parse_one_arg("interpreter frame pointer entry");
|
||||
}
|
||||
|
||||
//------------------------------inline_cache_parse-----------------------------
|
||||
void ADLParser::inline_cache_parse(FrameForm *frame, bool native) {
|
||||
frame->_inline_cache_reg = parse_one_arg("inline cache reg entry");
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -120,7 +120,6 @@ protected:
|
||||
// Parse the components of the frame section
|
||||
void sync_stack_slots_parse(FrameForm *frame);
|
||||
void frame_pointer_parse(FrameForm *frame, bool native);
|
||||
void interpreter_frame_pointer_parse(FrameForm *frame, bool native);
|
||||
void inline_cache_parse(FrameForm *frame, bool native);
|
||||
void interpreter_arg_ptr_parse(FrameForm *frame, bool native);
|
||||
void interpreter_method_parse(FrameForm *frame, bool native);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -476,7 +476,6 @@ void AllocClass::forms_do(FormClosure* f) {
|
||||
FrameForm::FrameForm() {
|
||||
_sync_stack_slots = nullptr;
|
||||
_inline_cache_reg = nullptr;
|
||||
_interpreter_frame_pointer_reg = nullptr;
|
||||
_cisc_spilling_operand_name = nullptr;
|
||||
_frame_pointer = nullptr;
|
||||
_c_frame_pointer = nullptr;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -347,7 +347,6 @@ public:
|
||||
// Public Data
|
||||
char *_sync_stack_slots;
|
||||
char *_inline_cache_reg;
|
||||
char *_interpreter_frame_pointer_reg;
|
||||
char *_cisc_spilling_operand_name;
|
||||
char *_frame_pointer;
|
||||
char *_c_frame_pointer;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -4212,14 +4212,6 @@ void ArchDesc::buildFrameMethods(FILE *fp_cpp) {
|
||||
fprintf(fp_cpp,"int Matcher::inline_cache_reg_encode() {");
|
||||
fprintf(fp_cpp," return _regEncode[inline_cache_reg()]; }\n\n");
|
||||
|
||||
// Interpreter's Frame Pointer Register
|
||||
fprintf(fp_cpp,"OptoReg::Name Matcher::interpreter_frame_pointer_reg() {");
|
||||
if (_frame->_interpreter_frame_pointer_reg == nullptr)
|
||||
fprintf(fp_cpp," return OptoReg::Bad; }\n\n");
|
||||
else
|
||||
fprintf(fp_cpp," return OptoReg::Name(%s_num); }\n\n",
|
||||
_frame->_interpreter_frame_pointer_reg);
|
||||
|
||||
// Frame Pointer definition
|
||||
/* CNC - I can not contemplate having a different frame pointer between
|
||||
Java and native code; makes my head hurt to think about it.
|
||||
|
||||
@ -1607,12 +1607,12 @@ void ClassVerifier::verify_method(const methodHandle& m, TRAPS) {
|
||||
case Bytecodes::_if_acmpeq :
|
||||
case Bytecodes::_if_acmpne :
|
||||
current_frame.pop_stack(
|
||||
VerificationType::reference_check(), CHECK_VERIFY(this));
|
||||
object_type(), CHECK_VERIFY(this));
|
||||
// fall through
|
||||
case Bytecodes::_ifnull :
|
||||
case Bytecodes::_ifnonnull :
|
||||
current_frame.pop_stack(
|
||||
VerificationType::reference_check(), CHECK_VERIFY(this));
|
||||
object_type(), CHECK_VERIFY(this));
|
||||
stackmap_table.check_jump_target
|
||||
(¤t_frame, bcs.bci(), bcs.get_offset_s2(), CHECK_VERIFY(this));
|
||||
no_control_flow = false; break;
|
||||
|
||||
@ -447,9 +447,6 @@ void CompilerConfig::set_jvmci_specific_flags() {
|
||||
if (FLAG_IS_DEFAULT(InitialCodeCacheSize)) {
|
||||
FLAG_SET_DEFAULT(InitialCodeCacheSize, MAX2(16*M, InitialCodeCacheSize));
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(NewSizeThreadIncrease)) {
|
||||
FLAG_SET_DEFAULT(NewSizeThreadIncrease, MAX2(4*K, NewSizeThreadIncrease));
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(Tier3DelayOn)) {
|
||||
// This effectively prevents the compile broker scheduling tier 2
|
||||
// (i.e., limited C1 profiling) compilations instead of tier 3
|
||||
|
||||
@ -58,7 +58,6 @@ define_pd_global(bool, TieredCompilation, false);
|
||||
define_pd_global(intx, CompileThreshold, 0);
|
||||
|
||||
define_pd_global(intx, OnStackReplacePercentage, 0);
|
||||
define_pd_global(size_t, NewSizeThreadIncrease, 4*K);
|
||||
define_pd_global(bool, InlineClassNatives, true);
|
||||
define_pd_global(bool, InlineUnsafeOps, true);
|
||||
define_pd_global(size_t, InitialCodeCacheSize, 160*K);
|
||||
|
||||
@ -73,8 +73,8 @@ void G1BlockOffsetTable::set_offset_array(Atomic<uint8_t>* left, Atomic<uint8_t>
|
||||
|
||||
#ifdef ASSERT
|
||||
void G1BlockOffsetTable::check_address(Atomic<uint8_t>* addr, const char* msg) const {
|
||||
Atomic<uint8_t>* start_addr = const_cast<Atomic<uint8_t>*>(_offset_base + (uintptr_t(_reserved.start()) >> CardTable::card_shift()));
|
||||
Atomic<uint8_t>* end_addr = const_cast<Atomic<uint8_t>*>(_offset_base + (uintptr_t(_reserved.end()) >> CardTable::card_shift()));
|
||||
Atomic<uint8_t>* start_addr = _offset_base + (uintptr_t(_reserved.start()) >> CardTable::card_shift());
|
||||
Atomic<uint8_t>* end_addr = _offset_base + (uintptr_t(_reserved.end()) >> CardTable::card_shift());
|
||||
assert(addr >= start_addr && addr <= end_addr,
|
||||
"%s - offset address: " PTR_FORMAT ", start address: " PTR_FORMAT ", end address: " PTR_FORMAT,
|
||||
msg, (p2i(addr)), (p2i(start_addr)), (p2i(end_addr)));
|
||||
|
||||
@ -54,7 +54,7 @@ uint8_t G1BlockOffsetTable::offset_array(Atomic<uint8_t>* addr) const {
|
||||
inline Atomic<uint8_t>* G1BlockOffsetTable::entry_for_addr(const void* const p) const {
|
||||
assert(_reserved.contains(p),
|
||||
"out of bounds access to block offset table");
|
||||
Atomic<uint8_t>* result = const_cast<Atomic<uint8_t>*>(&_offset_base[uintptr_t(p) >> CardTable::card_shift()]);
|
||||
Atomic<uint8_t>* result = &_offset_base[uintptr_t(p) >> CardTable::card_shift()];
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -1652,21 +1652,13 @@ jint G1CollectedHeap::initialize() {
|
||||
return JNI_OK;
|
||||
}
|
||||
|
||||
bool G1CollectedHeap::concurrent_mark_is_terminating() const {
|
||||
assert(_cm != nullptr, "_cm must have been created");
|
||||
assert(_cm->is_fully_initialized(), "thread must exist in order to check if mark is terminating");
|
||||
return _cm->cm_thread()->should_terminate();
|
||||
}
|
||||
|
||||
void G1CollectedHeap::stop() {
|
||||
// Stop all concurrent threads. We do this to make sure these threads
|
||||
// do not continue to execute and access resources (e.g. logging)
|
||||
// that are destroyed during shutdown.
|
||||
_cr->stop();
|
||||
_service_thread->stop();
|
||||
if (_cm->is_fully_initialized()) {
|
||||
_cm->cm_thread()->stop();
|
||||
}
|
||||
_cm->stop();
|
||||
}
|
||||
|
||||
void G1CollectedHeap::safepoint_synchronize_begin() {
|
||||
@ -1857,12 +1849,12 @@ void G1CollectedHeap::increment_old_marking_cycles_completed(bool concurrent,
|
||||
record_whole_heap_examined_timestamp();
|
||||
}
|
||||
|
||||
// We need to clear the "in_progress" flag in the CM thread before
|
||||
// We need to tell G1ConcurrentMark to update the state before
|
||||
// we wake up any waiters (especially when ExplicitInvokesConcurrent
|
||||
// is set) so that if a waiter requests another System.gc() it doesn't
|
||||
// incorrectly see that a marking cycle is still in progress.
|
||||
if (concurrent) {
|
||||
_cm->cm_thread()->set_idle();
|
||||
_cm->notify_concurrent_cycle_completed();
|
||||
}
|
||||
|
||||
// Notify threads waiting in System.gc() (with ExplicitGCInvokesConcurrent)
|
||||
@ -2565,11 +2557,9 @@ void G1CollectedHeap::start_concurrent_cycle(bool concurrent_operation_is_full_m
|
||||
assert(!_cm->in_progress(), "Can not start concurrent operation while in progress");
|
||||
MutexLocker x(G1CGC_lock, Mutex::_no_safepoint_check_flag);
|
||||
if (concurrent_operation_is_full_mark) {
|
||||
_cm->post_concurrent_mark_start();
|
||||
_cm->cm_thread()->start_full_mark();
|
||||
_cm->start_full_concurrent_cycle();
|
||||
} else {
|
||||
_cm->post_concurrent_undo_start();
|
||||
_cm->cm_thread()->start_undo_mark();
|
||||
_cm->start_undo_concurrent_cycle();
|
||||
}
|
||||
G1CGC_lock->notify();
|
||||
}
|
||||
|
||||
@ -915,9 +915,6 @@ public:
|
||||
// specified by the policy object.
|
||||
jint initialize() override;
|
||||
|
||||
// Returns whether concurrent mark threads (and the VM) are about to terminate.
|
||||
bool concurrent_mark_is_terminating() const;
|
||||
|
||||
void safepoint_synchronize_begin() override;
|
||||
void safepoint_synchronize_end() override;
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -174,7 +174,6 @@ void G1CollectionSet::iterate(G1HeapRegionClosure* cl) const {
|
||||
G1HeapRegion* r = _g1h->region_at(_regions[i]);
|
||||
bool result = cl->do_heap_region(r);
|
||||
if (result) {
|
||||
cl->set_incomplete();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -581,6 +581,11 @@ PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const
|
||||
return _partial_array_state_manager;
|
||||
}
|
||||
|
||||
G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
|
||||
assert(is_fully_initialized(), "must be");
|
||||
return _cm_thread;
|
||||
}
|
||||
|
||||
void G1ConcurrentMark::reset() {
|
||||
_has_aborted.store_relaxed(false);
|
||||
|
||||
@ -715,7 +720,6 @@ public:
|
||||
private:
|
||||
// Heap region closure used for clearing the _mark_bitmap.
|
||||
class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
|
||||
private:
|
||||
G1ConcurrentMark* _cm;
|
||||
G1CMBitMap* _bitmap;
|
||||
bool _suspendible; // If suspendible, do yield checks.
|
||||
@ -813,10 +817,6 @@ public:
|
||||
SuspendibleThreadSetJoiner sts_join(_suspendible);
|
||||
G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
|
||||
}
|
||||
|
||||
bool is_complete() {
|
||||
return _cl.is_complete();
|
||||
}
|
||||
};
|
||||
|
||||
void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
|
||||
@ -831,7 +831,6 @@ void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
|
||||
|
||||
log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
|
||||
workers->run_task(&cl, num_workers);
|
||||
guarantee(may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
|
||||
}
|
||||
|
||||
void G1ConcurrentMark::cleanup_for_next_mark() {
|
||||
@ -898,9 +897,26 @@ public:
|
||||
};
|
||||
|
||||
class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
|
||||
|
||||
class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
|
||||
G1ConcurrentMark* _cm;
|
||||
|
||||
public:
|
||||
NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
|
||||
|
||||
bool do_heap_region(G1HeapRegion* r) override {
|
||||
if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
|
||||
_cm->update_top_at_mark_start(r);
|
||||
} else {
|
||||
_cm->reset_top_at_mark_start(r);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} _region_cl;
|
||||
|
||||
G1HeapRegionClaimer _claimer;
|
||||
public:
|
||||
NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _claimer(0) { }
|
||||
NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
|
||||
|
||||
double worker_cost() const override {
|
||||
// The work done per region is very small, therefore we choose this magic number to cap the number
|
||||
@ -909,8 +925,13 @@ public:
|
||||
return _claimer.n_regions() / regions_per_thread;
|
||||
}
|
||||
|
||||
void set_max_workers(uint max_workers) override;
|
||||
void do_work(uint worker_id) override;
|
||||
void set_max_workers(uint max_workers) override {
|
||||
_claimer.set_n_workers(max_workers);
|
||||
}
|
||||
|
||||
void do_work(uint worker_id) override {
|
||||
G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
|
||||
}
|
||||
};
|
||||
|
||||
void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
|
||||
@ -918,31 +939,6 @@ void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
|
||||
_cm->reset();
|
||||
}
|
||||
|
||||
class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
|
||||
G1ConcurrentMark* _cm;
|
||||
|
||||
public:
|
||||
NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
|
||||
|
||||
bool do_heap_region(G1HeapRegion* r) override {
|
||||
if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
|
||||
_cm->update_top_at_mark_start(r);
|
||||
} else {
|
||||
_cm->reset_top_at_mark_start(r);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
void G1PreConcurrentStartTask::NoteStartOfMarkTask::do_work(uint worker_id) {
|
||||
NoteStartOfMarkHRClosure start_cl;
|
||||
G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&start_cl, &_claimer, worker_id);
|
||||
}
|
||||
|
||||
void G1PreConcurrentStartTask::NoteStartOfMarkTask::set_max_workers(uint max_workers) {
|
||||
_claimer.set_n_workers(max_workers);
|
||||
}
|
||||
|
||||
G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
|
||||
G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
|
||||
add_serial_task(new ResetMarkingStateTask(cm));
|
||||
@ -962,8 +958,7 @@ void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
|
||||
_gc_tracer_cm->set_gc_cause(cause);
|
||||
}
|
||||
|
||||
|
||||
void G1ConcurrentMark::post_concurrent_mark_start() {
|
||||
void G1ConcurrentMark::start_full_concurrent_cycle() {
|
||||
// Start Concurrent Marking weak-reference discovery.
|
||||
ReferenceProcessor* rp = _g1h->ref_processor_cm();
|
||||
rp->start_discovery(false /* always_clear */);
|
||||
@ -980,10 +975,26 @@ void G1ConcurrentMark::post_concurrent_mark_start() {
|
||||
// when marking is on. So, it's also called at the end of the
|
||||
// concurrent start pause to update the heap end, if the heap expands
|
||||
// during it. No need to call it here.
|
||||
|
||||
// Signal the thread to start work.
|
||||
cm_thread()->start_full_mark();
|
||||
}
|
||||
|
||||
void G1ConcurrentMark::post_concurrent_undo_start() {
|
||||
void G1ConcurrentMark::start_undo_concurrent_cycle() {
|
||||
root_regions()->cancel_scan();
|
||||
|
||||
// Signal the thread to start work.
|
||||
cm_thread()->start_undo_mark();
|
||||
}
|
||||
|
||||
void G1ConcurrentMark::notify_concurrent_cycle_completed() {
|
||||
cm_thread()->set_idle();
|
||||
}
|
||||
|
||||
void G1ConcurrentMark::stop() {
|
||||
if (is_fully_initialized()) {
|
||||
cm_thread()->stop();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1947,7 +1958,7 @@ bool G1ConcurrentMark::concurrent_cycle_abort() {
|
||||
// has been signalled is already rare), and this work should be negligible compared
|
||||
// to actual full gc work.
|
||||
|
||||
if (!is_fully_initialized() || (!cm_thread()->in_progress() && !_g1h->concurrent_mark_is_terminating())) {
|
||||
if (!is_fully_initialized() || (!cm_thread()->in_progress() && !cm_thread()->should_terminate())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -352,6 +352,7 @@ class G1ConcurrentMark : public CHeapObj<mtGC> {
|
||||
friend class G1CMRemarkTask;
|
||||
friend class G1CMRootRegionScanTask;
|
||||
friend class G1CMTask;
|
||||
friend class G1ClearBitMapTask;
|
||||
friend class G1ConcurrentMarkThread;
|
||||
|
||||
G1ConcurrentMarkThread* _cm_thread; // The thread doing the work
|
||||
@ -524,6 +525,9 @@ class G1ConcurrentMark : public CHeapObj<mtGC> {
|
||||
Atomic<HeapWord*>* _top_at_rebuild_starts;
|
||||
// True when Remark pause selected regions for rebuilding.
|
||||
bool _needs_remembered_set_rebuild;
|
||||
|
||||
G1ConcurrentMarkThread* cm_thread() const;
|
||||
|
||||
public:
|
||||
// To be called when an object is marked the first time, e.g. after a successful
|
||||
// mark_in_bitmap call. Updates various statistics data.
|
||||
@ -602,8 +606,6 @@ public:
|
||||
G1RegionToSpaceMapper* bitmap_storage);
|
||||
~G1ConcurrentMark();
|
||||
|
||||
G1ConcurrentMarkThread* cm_thread() { return _cm_thread; }
|
||||
|
||||
G1CMBitMap* mark_bitmap() const { return (G1CMBitMap*)&_mark_bitmap; }
|
||||
|
||||
// Calculates the number of concurrent GC threads to be used in the marking phase.
|
||||
@ -632,8 +634,15 @@ public:
|
||||
// These two methods do the work that needs to be done at the start and end of the
|
||||
// concurrent start pause.
|
||||
void pre_concurrent_start(GCCause::Cause cause);
|
||||
void post_concurrent_mark_start();
|
||||
void post_concurrent_undo_start();
|
||||
|
||||
// Start the particular type of concurrent cycle. After this call threads may be running.
|
||||
void start_full_concurrent_cycle();
|
||||
void start_undo_concurrent_cycle();
|
||||
|
||||
void notify_concurrent_cycle_completed();
|
||||
|
||||
// Stop active components/the concurrent mark thread.
|
||||
void stop();
|
||||
|
||||
// Scan all the root regions and mark everything reachable from
|
||||
// them.
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -32,7 +32,7 @@
|
||||
|
||||
// Total virtual time so far.
|
||||
inline double G1ConcurrentMarkThread::total_mark_cpu_time_s() {
|
||||
return os::thread_cpu_time(this) + worker_threads_cpu_time_s();
|
||||
return static_cast<double>(os::thread_cpu_time(this)) + worker_threads_cpu_time_s();
|
||||
}
|
||||
|
||||
// Marking virtual time so far
|
||||
|
||||
@ -567,41 +567,15 @@ public:
|
||||
// G1HeapRegionClosure is used for iterating over regions.
|
||||
// Terminates the iteration when the "do_heap_region" method returns "true".
|
||||
class G1HeapRegionClosure : public StackObj {
|
||||
friend class G1HeapRegionManager;
|
||||
friend class G1CollectionSet;
|
||||
friend class G1CollectionSetCandidates;
|
||||
|
||||
bool _is_complete;
|
||||
void set_incomplete() { _is_complete = false; }
|
||||
|
||||
public:
|
||||
G1HeapRegionClosure(): _is_complete(true) {}
|
||||
|
||||
// Typically called on each region until it returns true.
|
||||
virtual bool do_heap_region(G1HeapRegion* r) = 0;
|
||||
|
||||
// True after iteration if the closure was applied to all heap regions
|
||||
// and returned "false" in all cases.
|
||||
bool is_complete() { return _is_complete; }
|
||||
};
|
||||
|
||||
class G1HeapRegionIndexClosure : public StackObj {
|
||||
friend class G1HeapRegionManager;
|
||||
friend class G1CollectionSet;
|
||||
friend class G1CollectionSetCandidates;
|
||||
|
||||
bool _is_complete;
|
||||
void set_incomplete() { _is_complete = false; }
|
||||
|
||||
public:
|
||||
G1HeapRegionIndexClosure(): _is_complete(true) {}
|
||||
|
||||
// Typically called on each region until it returns true.
|
||||
virtual bool do_heap_region_index(uint region_index) = 0;
|
||||
|
||||
// True after iteration if the closure was applied to all heap regions
|
||||
// and returned "false" in all cases.
|
||||
bool is_complete() { return _is_complete; }
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_G1_G1HEAPREGION_HPP
|
||||
|
||||
@ -42,6 +42,11 @@
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
|
||||
inline HeapWord* G1HeapRegion::block_start(const void* addr) const {
|
||||
if (is_young()) {
|
||||
// We are here because of BlockLocationPrinter.
|
||||
// Can be invoked in any context, so this region might not be parsable.
|
||||
return nullptr;
|
||||
}
|
||||
return block_start(addr, parsable_bottom_acquire());
|
||||
}
|
||||
|
||||
@ -64,6 +69,7 @@ inline HeapWord* G1HeapRegion::advance_to_block_containing_addr(const void* addr
|
||||
|
||||
inline HeapWord* G1HeapRegion::block_start(const void* addr, HeapWord* const pb) const {
|
||||
assert(addr >= bottom() && addr < top(), "invalid address");
|
||||
assert(!is_young(), "Only non-young regions have BOT");
|
||||
HeapWord* first_block = _bot->block_start_reaching_into_card(addr);
|
||||
return advance_to_block_containing_addr(addr, pb, first_block);
|
||||
}
|
||||
|
||||
@ -511,7 +511,6 @@ void G1HeapRegionManager::iterate(G1HeapRegionClosure* blk) const {
|
||||
guarantee(at(i) != nullptr, "Tried to access region %u that has a null G1HeapRegion*", i);
|
||||
bool res = blk->do_heap_region(at(i));
|
||||
if (res) {
|
||||
blk->set_incomplete();
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -526,7 +525,6 @@ void G1HeapRegionManager::iterate(G1HeapRegionIndexClosure* blk) const {
|
||||
}
|
||||
bool res = blk->do_heap_region_index(i);
|
||||
if (res) {
|
||||
blk->set_incomplete();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -366,6 +366,12 @@ static size_t target_heap_capacity(size_t used_bytes, uintx free_ratio) {
|
||||
}
|
||||
|
||||
size_t G1HeapSizingPolicy::full_collection_resize_amount(bool& expand, size_t allocation_word_size) {
|
||||
// User-requested Full GCs introduce GC load unrelated to heap size; reset CPU
|
||||
// usage tracking so heap resizing heuristics are driven only by GC pressure.
|
||||
if (GCCause::is_user_requested_gc(_g1h->gc_cause())) {
|
||||
reset_cpu_usage_tracking_data();
|
||||
}
|
||||
|
||||
const size_t capacity_after_gc = _g1h->capacity();
|
||||
// Capacity, free and used after the GC counted as full regions to
|
||||
// include the waste in the following calculations.
|
||||
|
||||
@ -95,7 +95,7 @@ private:
|
||||
// Evict a given element of the statistics cache.
|
||||
void evict(uint idx);
|
||||
|
||||
size_t _num_cache_entries_mask;
|
||||
const uint _num_cache_entries_mask;
|
||||
|
||||
uint hash(uint idx) {
|
||||
return idx & _num_cache_entries_mask;
|
||||
|
||||
@ -1,83 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gc/serial/cSpaceCounters.hpp"
|
||||
#include "memory/allocation.inline.hpp"
|
||||
#include "memory/resourceArea.hpp"
|
||||
|
||||
CSpaceCounters::CSpaceCounters(const char* name, int ordinal, size_t max_size,
|
||||
ContiguousSpace* s, GenerationCounters* gc)
|
||||
: _space(s) {
|
||||
if (UsePerfData) {
|
||||
EXCEPTION_MARK;
|
||||
ResourceMark rm;
|
||||
|
||||
const char* cns = PerfDataManager::name_space(gc->name_space(), "space",
|
||||
ordinal);
|
||||
|
||||
_name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1, mtGC);
|
||||
strcpy(_name_space, cns);
|
||||
|
||||
const char* cname = PerfDataManager::counter_name(_name_space, "name");
|
||||
PerfDataManager::create_string_constant(SUN_GC, cname, name, CHECK);
|
||||
|
||||
cname = PerfDataManager::counter_name(_name_space, "maxCapacity");
|
||||
_max_capacity = PerfDataManager::create_variable(SUN_GC, cname,
|
||||
PerfData::U_Bytes,
|
||||
(jlong)max_size,
|
||||
CHECK);
|
||||
|
||||
cname = PerfDataManager::counter_name(_name_space, "capacity");
|
||||
_capacity = PerfDataManager::create_variable(SUN_GC, cname,
|
||||
PerfData::U_Bytes,
|
||||
_space->capacity(),
|
||||
CHECK);
|
||||
|
||||
cname = PerfDataManager::counter_name(_name_space, "used");
|
||||
_used = PerfDataManager::create_variable(SUN_GC, cname, PerfData::U_Bytes,
|
||||
_space->used(),
|
||||
CHECK);
|
||||
|
||||
cname = PerfDataManager::counter_name(_name_space, "initCapacity");
|
||||
PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
|
||||
_space->capacity(), CHECK);
|
||||
}
|
||||
}
|
||||
|
||||
CSpaceCounters::~CSpaceCounters() {
|
||||
FREE_C_HEAP_ARRAY(char, _name_space);
|
||||
}
|
||||
|
||||
void CSpaceCounters::update_capacity() {
|
||||
_capacity->set_value(_space->capacity());
|
||||
}
|
||||
|
||||
void CSpaceCounters::update_used() {
|
||||
_used->set_value(_space->used());
|
||||
}
|
||||
|
||||
void CSpaceCounters::update_all() {
|
||||
update_used();
|
||||
update_capacity();
|
||||
}
|
||||
@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_GC_SERIAL_CSPACECOUNTERS_HPP
|
||||
#define SHARE_GC_SERIAL_CSPACECOUNTERS_HPP
|
||||
|
||||
#include "gc/shared/generationCounters.hpp"
|
||||
#include "gc/shared/space.hpp"
|
||||
#include "runtime/perfData.hpp"
|
||||
|
||||
// A CSpaceCounters is a holder class for performance counters
|
||||
// that track a space;
|
||||
|
||||
class CSpaceCounters: public CHeapObj<mtGC> {
|
||||
private:
|
||||
PerfVariable* _capacity;
|
||||
PerfVariable* _used;
|
||||
PerfVariable* _max_capacity;
|
||||
|
||||
// Constant PerfData types don't need to retain a reference.
|
||||
// However, it's a good idea to document them here.
|
||||
// PerfConstant* _size;
|
||||
|
||||
ContiguousSpace* _space;
|
||||
char* _name_space;
|
||||
|
||||
public:
|
||||
|
||||
CSpaceCounters(const char* name, int ordinal, size_t max_size,
|
||||
ContiguousSpace* s, GenerationCounters* gc);
|
||||
|
||||
~CSpaceCounters();
|
||||
|
||||
void update_capacity();
|
||||
void update_used();
|
||||
void update_all();
|
||||
|
||||
const char* name_space() const { return _name_space; }
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_SERIAL_CSPACECOUNTERS_HPP
|
||||
@ -39,6 +39,7 @@
|
||||
#include "gc/shared/gcTimer.hpp"
|
||||
#include "gc/shared/gcTrace.hpp"
|
||||
#include "gc/shared/gcTraceTime.inline.hpp"
|
||||
#include "gc/shared/hSpaceCounters.hpp"
|
||||
#include "gc/shared/oopStorageSet.inline.hpp"
|
||||
#include "gc/shared/referencePolicy.hpp"
|
||||
#include "gc/shared/referenceProcessorPhaseTimes.hpp"
|
||||
@ -248,12 +249,12 @@ DefNewGeneration::DefNewGeneration(ReservedSpace rs,
|
||||
min_size, max_size, _virtual_space.committed_size());
|
||||
_gc_counters = new CollectorCounters(policy, 0);
|
||||
|
||||
_eden_counters = new CSpaceCounters("eden", 0, _max_eden_size, _eden_space,
|
||||
_gen_counters);
|
||||
_from_counters = new CSpaceCounters("s0", 1, _max_survivor_size, _from_space,
|
||||
_gen_counters);
|
||||
_to_counters = new CSpaceCounters("s1", 2, _max_survivor_size, _to_space,
|
||||
_gen_counters);
|
||||
_eden_counters = new HSpaceCounters(_gen_counters->name_space(), "eden", 0,
|
||||
_max_eden_size, _eden_space->capacity());
|
||||
_from_counters = new HSpaceCounters(_gen_counters->name_space(), "s0", 1,
|
||||
_max_survivor_size, _from_space->capacity());
|
||||
_to_counters = new HSpaceCounters(_gen_counters->name_space(), "s1", 2,
|
||||
_max_survivor_size, _to_space->capacity());
|
||||
|
||||
update_counters();
|
||||
_old_gen = nullptr;
|
||||
@ -319,7 +320,7 @@ void DefNewGeneration::swap_spaces() {
|
||||
_to_space = s;
|
||||
|
||||
if (UsePerfData) {
|
||||
CSpaceCounters* c = _from_counters;
|
||||
HSpaceCounters* c = _from_counters;
|
||||
_from_counters = _to_counters;
|
||||
_to_counters = c;
|
||||
}
|
||||
@ -348,38 +349,6 @@ void DefNewGeneration::expand_eden_by(size_t delta_bytes) {
|
||||
post_resize();
|
||||
}
|
||||
|
||||
size_t DefNewGeneration::calculate_thread_increase_size(int threads_count) const {
|
||||
size_t thread_increase_size = 0;
|
||||
// Check an overflow at 'threads_count * NewSizeThreadIncrease'.
|
||||
if (threads_count > 0 && NewSizeThreadIncrease <= max_uintx / threads_count) {
|
||||
thread_increase_size = threads_count * NewSizeThreadIncrease;
|
||||
}
|
||||
return thread_increase_size;
|
||||
}
|
||||
|
||||
size_t DefNewGeneration::adjust_for_thread_increase(size_t new_size_candidate,
|
||||
size_t new_size_before,
|
||||
size_t alignment,
|
||||
size_t thread_increase_size) const {
|
||||
size_t desired_new_size = new_size_before;
|
||||
|
||||
if (NewSizeThreadIncrease > 0 && thread_increase_size > 0) {
|
||||
|
||||
// 1. Check an overflow at 'new_size_candidate + thread_increase_size'.
|
||||
if (new_size_candidate <= max_uintx - thread_increase_size) {
|
||||
new_size_candidate += thread_increase_size;
|
||||
|
||||
// 2. Check an overflow at 'align_up'.
|
||||
size_t aligned_max = ((max_uintx - alignment) & ~(alignment-1));
|
||||
if (new_size_candidate <= aligned_max) {
|
||||
desired_new_size = align_up(new_size_candidate, alignment);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return desired_new_size;
|
||||
}
|
||||
|
||||
size_t DefNewGeneration::calculate_desired_young_gen_bytes() const {
|
||||
size_t old_size = SerialHeap::heap()->old_gen()->capacity();
|
||||
size_t new_size_before = _virtual_space.committed_size();
|
||||
@ -391,14 +360,8 @@ size_t DefNewGeneration::calculate_desired_young_gen_bytes() const {
|
||||
// All space sizes must be multiples of Generation::GenGrain.
|
||||
size_t alignment = Generation::GenGrain;
|
||||
|
||||
int threads_count = Threads::number_of_non_daemon_threads();
|
||||
size_t thread_increase_size = calculate_thread_increase_size(threads_count);
|
||||
|
||||
size_t new_size_candidate = old_size / NewRatio;
|
||||
// Compute desired new generation size based on NewRatio and NewSizeThreadIncrease
|
||||
// and reverts to previous value if any overflow happens
|
||||
size_t desired_new_size = adjust_for_thread_increase(new_size_candidate, new_size_before,
|
||||
alignment, thread_increase_size);
|
||||
size_t desired_new_size = align_up(new_size_candidate, alignment);
|
||||
|
||||
// Adjust new generation size
|
||||
desired_new_size = clamp(desired_new_size, min_new_size, max_new_size);
|
||||
@ -821,9 +784,9 @@ void DefNewGeneration::gc_epilogue() {
|
||||
|
||||
void DefNewGeneration::update_counters() {
|
||||
if (UsePerfData) {
|
||||
_eden_counters->update_all();
|
||||
_from_counters->update_all();
|
||||
_to_counters->update_all();
|
||||
_eden_counters->update_all(_eden_space->capacity(), _eden_space->used());
|
||||
_from_counters->update_all(_from_space->capacity(), _from_space->used());
|
||||
_to_counters->update_all(_to_space->capacity(), _to_space->used());
|
||||
_gen_counters->update_capacity(_virtual_space.committed_size());
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,7 +25,6 @@
|
||||
#ifndef SHARE_GC_SERIAL_DEFNEWGENERATION_HPP
|
||||
#define SHARE_GC_SERIAL_DEFNEWGENERATION_HPP
|
||||
|
||||
#include "gc/serial/cSpaceCounters.hpp"
|
||||
#include "gc/serial/generation.hpp"
|
||||
#include "gc/serial/tenuredGeneration.hpp"
|
||||
#include "gc/shared/ageTable.hpp"
|
||||
@ -38,7 +37,7 @@
|
||||
#include "utilities/stack.hpp"
|
||||
|
||||
class ContiguousSpace;
|
||||
class CSpaceCounters;
|
||||
class HSpaceCounters;
|
||||
class OldGenScanClosure;
|
||||
class YoungGenScanClosure;
|
||||
class DefNewTracer;
|
||||
@ -102,9 +101,9 @@ class DefNewGeneration: public Generation {
|
||||
|
||||
// Performance Counters
|
||||
GenerationCounters* _gen_counters;
|
||||
CSpaceCounters* _eden_counters;
|
||||
CSpaceCounters* _from_counters;
|
||||
CSpaceCounters* _to_counters;
|
||||
HSpaceCounters* _eden_counters;
|
||||
HSpaceCounters* _from_counters;
|
||||
HSpaceCounters* _to_counters;
|
||||
|
||||
// sizing information
|
||||
size_t _max_eden_size;
|
||||
@ -230,15 +229,6 @@ class DefNewGeneration: public Generation {
|
||||
// Initialize eden/from/to spaces.
|
||||
void init_spaces();
|
||||
|
||||
// Return adjusted new size for NewSizeThreadIncrease.
|
||||
// If any overflow happens, revert to previous new size.
|
||||
size_t adjust_for_thread_increase(size_t new_size_candidate,
|
||||
size_t new_size_before,
|
||||
size_t alignment,
|
||||
size_t thread_increase_size) const;
|
||||
|
||||
size_t calculate_thread_increase_size(int threads_count) const;
|
||||
|
||||
|
||||
// Scavenge support
|
||||
void swap_spaces();
|
||||
|
||||
@ -32,6 +32,7 @@
|
||||
#include "gc/shared/gcTimer.hpp"
|
||||
#include "gc/shared/gcTrace.hpp"
|
||||
#include "gc/shared/genArguments.hpp"
|
||||
#include "gc/shared/hSpaceCounters.hpp"
|
||||
#include "gc/shared/space.hpp"
|
||||
#include "gc/shared/spaceDecorator.hpp"
|
||||
#include "logging/log.hpp"
|
||||
@ -330,9 +331,9 @@ TenuredGeneration::TenuredGeneration(ReservedSpace rs,
|
||||
|
||||
_gc_counters = new CollectorCounters("Serial full collection pauses", 1);
|
||||
|
||||
_space_counters = new CSpaceCounters(gen_name, 0,
|
||||
_space_counters = new HSpaceCounters(_gen_counters->name_space(), gen_name, 0,
|
||||
_virtual_space.reserved_size(),
|
||||
_the_space, _gen_counters);
|
||||
_the_space->capacity());
|
||||
}
|
||||
|
||||
void TenuredGeneration::gc_prologue() {
|
||||
@ -367,7 +368,7 @@ void TenuredGeneration::update_promote_stats() {
|
||||
|
||||
void TenuredGeneration::update_counters() {
|
||||
if (UsePerfData) {
|
||||
_space_counters->update_all();
|
||||
_space_counters->update_all(_the_space->capacity(), _the_space->used());
|
||||
_gen_counters->update_capacity(_virtual_space.committed_size());
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,7 +25,6 @@
|
||||
#ifndef SHARE_GC_SERIAL_TENUREDGENERATION_HPP
|
||||
#define SHARE_GC_SERIAL_TENUREDGENERATION_HPP
|
||||
|
||||
#include "gc/serial/cSpaceCounters.hpp"
|
||||
#include "gc/serial/generation.hpp"
|
||||
#include "gc/serial/serialBlockOffsetTable.hpp"
|
||||
#include "gc/shared/generationCounters.hpp"
|
||||
@ -34,6 +33,7 @@
|
||||
|
||||
class CardTableRS;
|
||||
class ContiguousSpace;
|
||||
class HSpaceCounters;
|
||||
|
||||
// TenuredGeneration models the heap containing old (promoted/tenured) objects
|
||||
// contained in a single contiguous space. This generation is covered by a card
|
||||
@ -68,7 +68,7 @@ class TenuredGeneration: public Generation {
|
||||
ContiguousSpace* _the_space; // Actual space holding objects
|
||||
|
||||
GenerationCounters* _gen_counters;
|
||||
CSpaceCounters* _space_counters;
|
||||
HSpaceCounters* _space_counters;
|
||||
|
||||
// Avg amount promoted; used for avoiding promotion undo
|
||||
// This class does not update deviations if the sample is zero.
|
||||
|
||||
@ -289,7 +289,7 @@ protected:
|
||||
DEBUG_ONLY(bool is_in_or_null(const void* p) const { return p == nullptr || is_in(p); })
|
||||
|
||||
void set_gc_cause(GCCause::Cause v);
|
||||
GCCause::Cause gc_cause() { return _gc_cause; }
|
||||
GCCause::Cause gc_cause() const { return _gc_cause; }
|
||||
|
||||
oop obj_allocate(Klass* klass, size_t size, TRAPS);
|
||||
virtual oop array_allocate(Klass* klass, size_t size, int length, bool do_zero, TRAPS);
|
||||
|
||||
@ -480,11 +480,6 @@
|
||||
"Ratio of old/new generation sizes") \
|
||||
range(0, max_uintx-1) \
|
||||
\
|
||||
product_pd(size_t, NewSizeThreadIncrease, \
|
||||
"Additional size added to desired new generation size per " \
|
||||
"non-daemon thread (in bytes)") \
|
||||
range(0, max_uintx) \
|
||||
\
|
||||
product(uintx, QueuedAllocationWarningCount, 0, \
|
||||
"Number of times an allocation that queues behind a GC " \
|
||||
"will retry before printing a warning") \
|
||||
|
||||
@ -147,14 +147,14 @@ public:
|
||||
|
||||
// PLAB book-keeping.
|
||||
class PLABStats : public CHeapObj<mtGC> {
|
||||
protected:
|
||||
const char* _description; // Identifying string.
|
||||
|
||||
Atomic<size_t> _allocated; // Total allocated
|
||||
Atomic<size_t> _wasted; // of which wasted (internal fragmentation)
|
||||
Atomic<size_t> _undo_wasted; // of which wasted on undo (is not used for calculation of PLAB size)
|
||||
Atomic<size_t> _unused; // Unused in last buffer
|
||||
|
||||
protected:
|
||||
const char* _description; // Identifying string.
|
||||
|
||||
virtual void reset() {
|
||||
_allocated.store_relaxed(0);
|
||||
_wasted.store_relaxed(0);
|
||||
@ -164,11 +164,11 @@ protected:
|
||||
|
||||
public:
|
||||
PLABStats(const char* description) :
|
||||
_description(description),
|
||||
_allocated(0),
|
||||
_wasted(0),
|
||||
_undo_wasted(0),
|
||||
_unused(0)
|
||||
_unused(0),
|
||||
_description(description)
|
||||
{ }
|
||||
|
||||
virtual ~PLABStats() { }
|
||||
|
||||
@ -33,6 +33,7 @@
|
||||
#include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
|
||||
#include "gc/shenandoah/shenandoahHeap.inline.hpp"
|
||||
#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
|
||||
#include "gc/shenandoah/shenandoahYoungGeneration.hpp"
|
||||
#include "logging/log.hpp"
|
||||
#include "logging/logTag.hpp"
|
||||
#include "runtime/globals.hpp"
|
||||
@ -59,14 +60,95 @@ const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0
|
||||
const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25%
|
||||
const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9%
|
||||
|
||||
|
||||
// To enable detection of GC time trends, we keep separate track of the recent history of gc time. During initialization,
|
||||
// for example, the amount of live memory may be increasing, which is likely to cause the GC times to increase. This history
|
||||
// allows us to predict increasing GC times rather than always assuming average recent GC time is the best predictor.
|
||||
const size_t ShenandoahAdaptiveHeuristics::GC_TIME_SAMPLE_SIZE = 3;
|
||||
|
||||
// We also keep separate track of recently sampled allocation rates for two purposes:
|
||||
// 1. The number of samples examined to determine acceleration of allocation is represented by
|
||||
// ShenandoahRateAccelerationSampleSize
|
||||
// 2. The number of most recent samples averaged to determine a momentary allocation spike is represented by
|
||||
// ShenandoahMomentaryAllocationRateSpikeSampleSize
|
||||
|
||||
// Allocation rates are sampled by the regulator thread, which typically runs every ms. There may be jitter in the scheduling
|
||||
// of the regulator thread. To reduce signal noise and synchronization overhead, we do not sample allocation rate with every
|
||||
// iteration of the regulator. We prefer sample time longer than 1 ms so that there can be a statistically significant number
|
||||
// of allocations occuring within each sample period. The regulator thread samples allocation rate only if at least
|
||||
// ShenandoahAccelerationSamplePeriod ms have passed since it previously sampled the allocation rate.
|
||||
//
|
||||
// This trigger responds much more quickly than the traditional trigger, which monitors 100 ms spans. When acceleration is
|
||||
// detected, the impact of acceleration on anticipated consumption of available memory is also much more impactful
|
||||
// than the assumed constant allocation rate consumption of available memory.
|
||||
|
||||
ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics(ShenandoahSpaceInfo* space_info) :
|
||||
ShenandoahHeuristics(space_info),
|
||||
_margin_of_error_sd(ShenandoahAdaptiveInitialConfidence),
|
||||
_spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold),
|
||||
_last_trigger(OTHER),
|
||||
_available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor) { }
|
||||
_available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor),
|
||||
_free_set(nullptr),
|
||||
_previous_acceleration_sample_timestamp(0.0),
|
||||
_gc_time_first_sample_index(0),
|
||||
_gc_time_num_samples(0),
|
||||
_gc_time_timestamps(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)),
|
||||
_gc_time_samples(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)),
|
||||
_gc_time_xy(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)),
|
||||
_gc_time_xx(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)),
|
||||
_gc_time_sum_of_timestamps(0),
|
||||
_gc_time_sum_of_samples(0),
|
||||
_gc_time_sum_of_xy(0),
|
||||
_gc_time_sum_of_xx(0),
|
||||
_gc_time_m(0.0),
|
||||
_gc_time_b(0.0),
|
||||
_gc_time_sd(0.0),
|
||||
_spike_acceleration_buffer_size(MAX2(ShenandoahRateAccelerationSampleSize, 1+ShenandoahMomentaryAllocationRateSpikeSampleSize)),
|
||||
_spike_acceleration_first_sample_index(0),
|
||||
_spike_acceleration_num_samples(0),
|
||||
_spike_acceleration_rate_samples(NEW_C_HEAP_ARRAY(double, _spike_acceleration_buffer_size, mtGC)),
|
||||
_spike_acceleration_rate_timestamps(NEW_C_HEAP_ARRAY(double, _spike_acceleration_buffer_size, mtGC)) {
|
||||
}
|
||||
|
||||
ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {}
|
||||
ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {
|
||||
FREE_C_HEAP_ARRAY(double, _spike_acceleration_rate_samples);
|
||||
FREE_C_HEAP_ARRAY(double, _spike_acceleration_rate_timestamps);
|
||||
FREE_C_HEAP_ARRAY(double, _gc_time_timestamps);
|
||||
FREE_C_HEAP_ARRAY(double, _gc_time_samples);
|
||||
FREE_C_HEAP_ARRAY(double, _gc_time_xy);
|
||||
FREE_C_HEAP_ARRAY(double, _gc_time_xx);
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::initialize() {
|
||||
ShenandoahHeuristics::initialize();
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::post_initialize() {
|
||||
ShenandoahHeuristics::post_initialize();
|
||||
_free_set = ShenandoahHeap::heap()->free_set();
|
||||
assert(!ShenandoahHeap::heap()->mode()->is_generational(), "ShenandoahGenerationalHeuristics overrides this method");
|
||||
compute_headroom_adjustment();
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::compute_headroom_adjustment() {
|
||||
// The trigger threshold represents mutator available - "head room".
|
||||
// We plan for GC to finish before the amount of allocated memory exceeds trigger threshold. This is the same as saying we
|
||||
// intend to finish GC before the amount of available memory is less than the allocation headroom. Headroom is the planned
|
||||
// safety buffer to allow a small amount of additional allocation to take place in case we were overly optimistic in delaying
|
||||
// our trigger.
|
||||
size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
|
||||
size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
|
||||
size_t penalties = capacity / 100 * _gc_time_penalties;
|
||||
_headroom_adjustment = spike_headroom + penalties;
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::start_idle_span() {
|
||||
compute_headroom_adjustment();
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::adjust_penalty(intx step) {
|
||||
ShenandoahHeuristics::adjust_penalty(step);
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
|
||||
RegionData* data, size_t size,
|
||||
@ -76,8 +158,8 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand
|
||||
// The logic for cset selection in adaptive is as follows:
|
||||
//
|
||||
// 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME
|
||||
// during evacuation, and thus guarantee full GC. In practice, we also want to let
|
||||
// application to allocate something. This is why we limit CSet to some fraction of
|
||||
// during evacuation, and thus guarantee full GC. In practice, we also want to let the
|
||||
// application allocate during concurrent GC. This is why we limit CSet to some fraction of
|
||||
// available space. In non-overloaded heap, max_cset would contain all plausible candidates
|
||||
// over garbage threshold.
|
||||
//
|
||||
@ -108,6 +190,7 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand
|
||||
size_t cur_cset = 0;
|
||||
size_t cur_garbage = 0;
|
||||
|
||||
// Regions are sorted in order of decreasing garbage
|
||||
for (size_t idx = 0; idx < size; idx++) {
|
||||
ShenandoahHeapRegion* r = data[idx].get_region();
|
||||
|
||||
@ -126,6 +209,88 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand
|
||||
}
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::add_degenerated_gc_time(double timestamp, double gc_time) {
|
||||
// Conservatively add sample into linear model If this time is above the predicted concurrent gc time
|
||||
if (predict_gc_time(timestamp) < gc_time) {
|
||||
add_gc_time(timestamp, gc_time);
|
||||
}
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::add_gc_time(double timestamp, double gc_time) {
|
||||
// Update best-fit linear predictor of GC time
|
||||
uint index = (_gc_time_first_sample_index + _gc_time_num_samples) % GC_TIME_SAMPLE_SIZE;
|
||||
if (_gc_time_num_samples == GC_TIME_SAMPLE_SIZE) {
|
||||
_gc_time_sum_of_timestamps -= _gc_time_timestamps[index];
|
||||
_gc_time_sum_of_samples -= _gc_time_samples[index];
|
||||
_gc_time_sum_of_xy -= _gc_time_xy[index];
|
||||
_gc_time_sum_of_xx -= _gc_time_xx[index];
|
||||
}
|
||||
_gc_time_timestamps[index] = timestamp;
|
||||
_gc_time_samples[index] = gc_time;
|
||||
_gc_time_xy[index] = timestamp * gc_time;
|
||||
_gc_time_xx[index] = timestamp * timestamp;
|
||||
|
||||
_gc_time_sum_of_timestamps += _gc_time_timestamps[index];
|
||||
_gc_time_sum_of_samples += _gc_time_samples[index];
|
||||
_gc_time_sum_of_xy += _gc_time_xy[index];
|
||||
_gc_time_sum_of_xx += _gc_time_xx[index];
|
||||
|
||||
if (_gc_time_num_samples < GC_TIME_SAMPLE_SIZE) {
|
||||
_gc_time_num_samples++;
|
||||
} else {
|
||||
_gc_time_first_sample_index = (_gc_time_first_sample_index + 1) % GC_TIME_SAMPLE_SIZE;
|
||||
}
|
||||
|
||||
if (_gc_time_num_samples == 1) {
|
||||
// The predictor is constant (horizontal line)
|
||||
_gc_time_m = 0;
|
||||
_gc_time_b = gc_time;
|
||||
_gc_time_sd = 0.0;
|
||||
} else if (_gc_time_num_samples == 2) {
|
||||
// Two points define a line
|
||||
double delta_y = gc_time - _gc_time_samples[_gc_time_first_sample_index];
|
||||
double delta_x = timestamp - _gc_time_timestamps[_gc_time_first_sample_index];
|
||||
_gc_time_m = delta_y / delta_x;
|
||||
|
||||
// y = mx + b
|
||||
// so b = y0 - mx0
|
||||
_gc_time_b = gc_time - _gc_time_m * timestamp;
|
||||
_gc_time_sd = 0.0;
|
||||
} else {
|
||||
_gc_time_m = ((_gc_time_num_samples * _gc_time_sum_of_xy - _gc_time_sum_of_timestamps * _gc_time_sum_of_samples) /
|
||||
(_gc_time_num_samples * _gc_time_sum_of_xx - _gc_time_sum_of_timestamps * _gc_time_sum_of_timestamps));
|
||||
_gc_time_b = (_gc_time_sum_of_samples - _gc_time_m * _gc_time_sum_of_timestamps) / _gc_time_num_samples;
|
||||
double sum_of_squared_deviations = 0.0;
|
||||
for (size_t i = 0; i < _gc_time_num_samples; i++) {
|
||||
uint index = (_gc_time_first_sample_index + i) % GC_TIME_SAMPLE_SIZE;
|
||||
double x = _gc_time_timestamps[index];
|
||||
double predicted_y = _gc_time_m * x + _gc_time_b;
|
||||
double deviation = predicted_y - _gc_time_samples[index];
|
||||
sum_of_squared_deviations += deviation * deviation;
|
||||
}
|
||||
_gc_time_sd = sqrt(sum_of_squared_deviations / _gc_time_num_samples);
|
||||
}
|
||||
}
|
||||
|
||||
double ShenandoahAdaptiveHeuristics::predict_gc_time(double timestamp_at_start) {
|
||||
return _gc_time_m * timestamp_at_start + _gc_time_b + _gc_time_sd * _margin_of_error_sd;
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::add_rate_to_acceleration_history(double timestamp, double rate) {
|
||||
uint new_sample_index =
|
||||
(_spike_acceleration_first_sample_index + _spike_acceleration_num_samples) % _spike_acceleration_buffer_size;
|
||||
_spike_acceleration_rate_timestamps[new_sample_index] = timestamp;
|
||||
_spike_acceleration_rate_samples[new_sample_index] = rate;
|
||||
if (_spike_acceleration_num_samples == _spike_acceleration_buffer_size) {
|
||||
_spike_acceleration_first_sample_index++;
|
||||
if (_spike_acceleration_first_sample_index == _spike_acceleration_buffer_size) {
|
||||
_spike_acceleration_first_sample_index = 0;
|
||||
}
|
||||
} else {
|
||||
_spike_acceleration_num_samples++;
|
||||
}
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::record_cycle_start() {
|
||||
ShenandoahHeuristics::record_cycle_start();
|
||||
_allocation_rate.allocation_counter_reset();
|
||||
@ -133,6 +298,10 @@ void ShenandoahAdaptiveHeuristics::record_cycle_start() {
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::record_success_concurrent() {
|
||||
ShenandoahHeuristics::record_success_concurrent();
|
||||
double now = os::elapsedTime();
|
||||
|
||||
// Should we not add GC time if this was an abbreviated cycle?
|
||||
add_gc_time(_cycle_start, elapsed_cycle_time());
|
||||
|
||||
size_t available = _space_info->available();
|
||||
|
||||
@ -185,6 +354,7 @@ void ShenandoahAdaptiveHeuristics::record_success_concurrent() {
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::record_degenerated() {
|
||||
ShenandoahHeuristics::record_degenerated();
|
||||
add_degenerated_gc_time(_precursor_cycle_start, elapsed_degenerated_cycle_time());
|
||||
// Adjust both trigger's parameters in the case of a degenerated GC because
|
||||
// either of them should have triggered earlier to avoid this case.
|
||||
adjust_margin_of_error(DEGENERATE_PENALTY_SD);
|
||||
@ -236,6 +406,24 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() {
|
||||
size_t available = _space_info->soft_mutator_available();
|
||||
size_t allocated = _space_info->bytes_allocated_since_gc_start();
|
||||
|
||||
double avg_cycle_time = 0;
|
||||
double avg_alloc_rate = 0;
|
||||
double now = get_most_recent_wake_time();
|
||||
size_t allocatable_words = this->allocatable(available);
|
||||
double predicted_future_accelerated_gc_time = 0.0;
|
||||
size_t allocated_bytes_since_last_sample = 0;
|
||||
double instantaneous_rate_words_per_second = 0.0;
|
||||
size_t consumption_accelerated = 0;
|
||||
double acceleration = 0.0;
|
||||
double current_rate_by_acceleration = 0.0;
|
||||
size_t min_threshold = min_free_threshold();
|
||||
double predicted_future_gc_time = 0;
|
||||
double future_planned_gc_time = 0;
|
||||
bool future_planned_gc_time_is_average = false;
|
||||
double avg_time_to_deplete_available = 0.0;
|
||||
bool is_spiking = false;
|
||||
double spike_time_to_deplete_available = 0.0;
|
||||
|
||||
log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT ", "
|
||||
"allocated_since_gc_start: " PROPERFMT,
|
||||
PROPERFMTARGS(available), PROPERFMTARGS(capacity), PROPERFMTARGS(allocated));
|
||||
@ -250,7 +438,6 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() {
|
||||
|
||||
_last_trigger = OTHER;
|
||||
|
||||
size_t min_threshold = min_free_threshold();
|
||||
if (available < min_threshold) {
|
||||
log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")",
|
||||
PROPERFMTARGS(available), PROPERFMTARGS(min_threshold));
|
||||
@ -271,55 +458,227 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// Check if allocation headroom is still okay. This also factors in:
|
||||
// 1. Some space to absorb allocation spikes (ShenandoahAllocSpikeFactor)
|
||||
// 2. Accumulated penalties from Degenerated and Full GC
|
||||
size_t allocation_headroom = available;
|
||||
|
||||
size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
|
||||
size_t penalties = capacity / 100 * _gc_time_penalties;
|
||||
// The test (3 * allocated > available) below is intended to prevent triggers from firing so quickly that there
|
||||
// has not been sufficient time to create garbage that can be reclaimed during the triggered GC cycle. If we trigger before
|
||||
// garbage has been created, the concurrent GC will find no garbage. This has been observed to result in degens which
|
||||
// experience OOM during evac or that experience "bad progress", both of which escalate to Full GC. Note that garbage that
|
||||
// was allocated following the start of the current GC cycle cannot be reclaimed in this GC cycle. Here is the derivation
|
||||
// of the expression:
|
||||
//
|
||||
// Let R (runway) represent the total amount of memory that can be allocated following the start of GC(N). The runway
|
||||
// represents memory available at the start of the current GC plus garbage reclaimed by the current GC. In a balanced,
|
||||
// fully utilized configuration, we will be starting each new GC cycle immediately following completion of the preceding
|
||||
// GC cycle. In this configuration, we would expect half of R to be consumed during concurrent cycle GC(N) and half
|
||||
// to be consumed during concurrent GC(N+1).
|
||||
//
|
||||
// Assume we want to delay GC trigger until: A/V > 0.33
|
||||
// This is equivalent to enforcing that: A > 0.33V
|
||||
// which is: 3A > V
|
||||
// Since A+V equals R, we have: A + 3A > A + V = R
|
||||
// which is to say that: A > R/4
|
||||
//
|
||||
// Postponing the trigger until at least 1/4 of the runway has been consumed helps to improve the efficiency of the
|
||||
// triggered GC. Under heavy steady state workload, this delay condition generally has no effect: if the allocation
|
||||
// runway is divided "equally" between the current GC and the next GC, then at any potential trigger point (which cannot
|
||||
// happen any sooner than completion of the first GC), it is already the case that roughly A > R/2.
|
||||
if (3 * allocated <= available) {
|
||||
// Even though we will not issue an adaptive trigger unless a minimum threshold of memory has been allocated,
|
||||
// we still allow more generic triggers, such as guaranteed GC intervals, to act.
|
||||
return ShenandoahHeuristics::should_start_gc();
|
||||
}
|
||||
|
||||
allocation_headroom -= MIN2(allocation_headroom, spike_headroom);
|
||||
allocation_headroom -= MIN2(allocation_headroom, penalties);
|
||||
avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd());
|
||||
avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);
|
||||
if ((now - _previous_acceleration_sample_timestamp) >= (ShenandoahAccelerationSamplePeriod / 1000.0)) {
|
||||
predicted_future_accelerated_gc_time =
|
||||
predict_gc_time(now + MAX2(get_planned_sleep_interval(), ShenandoahAccelerationSamplePeriod / 1000.0));
|
||||
double future_accelerated_planned_gc_time;
|
||||
bool future_accelerated_planned_gc_time_is_average;
|
||||
if (predicted_future_accelerated_gc_time > avg_cycle_time) {
|
||||
future_accelerated_planned_gc_time = predicted_future_accelerated_gc_time;
|
||||
future_accelerated_planned_gc_time_is_average = false;
|
||||
} else {
|
||||
future_accelerated_planned_gc_time = avg_cycle_time;
|
||||
future_accelerated_planned_gc_time_is_average = true;
|
||||
}
|
||||
allocated_bytes_since_last_sample = _free_set->get_bytes_allocated_since_previous_sample();
|
||||
instantaneous_rate_words_per_second =
|
||||
(allocated_bytes_since_last_sample / HeapWordSize) / (now - _previous_acceleration_sample_timestamp);
|
||||
|
||||
double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd());
|
||||
double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);
|
||||
_previous_acceleration_sample_timestamp = now;
|
||||
add_rate_to_acceleration_history(now, instantaneous_rate_words_per_second);
|
||||
current_rate_by_acceleration = instantaneous_rate_words_per_second;
|
||||
consumption_accelerated =
|
||||
accelerated_consumption(acceleration, current_rate_by_acceleration, avg_alloc_rate / HeapWordSize,
|
||||
(ShenandoahAccelerationSamplePeriod / 1000.0) + future_accelerated_planned_gc_time);
|
||||
|
||||
log_debug(gc)("average GC time: %.2f ms, allocation rate: %.0f %s/s",
|
||||
avg_cycle_time * 1000, byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate));
|
||||
if (avg_cycle_time * avg_alloc_rate > allocation_headroom) {
|
||||
log_trigger("Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s)"
|
||||
" to deplete free headroom (%zu%s) (margin of error = %.2f)",
|
||||
avg_cycle_time * 1000,
|
||||
byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate),
|
||||
byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
|
||||
_margin_of_error_sd);
|
||||
log_info(gc, ergo)("Free headroom: %zu%s (free) - %zu%s (spike) - %zu%s (penalties) = %zu%s",
|
||||
byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
|
||||
byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom),
|
||||
byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties),
|
||||
byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom));
|
||||
// Note that even a single thread that wakes up and begins to allocate excessively can manifest as accelerating allocation
|
||||
// rate. This thread will initially allocate a TLAB of minimum size. Then it will allocate a TLAB twice as big a bit later,
|
||||
// and then twice as big again after another short delay. When a phase change causes many threads to increase their
|
||||
// allocation behavior, this effect is multiplied, and compounded by jitter in the times that individual threads experience
|
||||
// the phase change.
|
||||
//
|
||||
// The following trace represents an actual workload, with allocation rates sampled at 10 Hz, the default behavior before
|
||||
// introduction of accelerated allocation rate detection. Though the allocation rate is seen to be increasing at times
|
||||
// 101.907 and 102.007 and 102.108, the newly sampled allocation rate is not enough to trigger GC because the headroom is
|
||||
// still quite large. In fact, GC is not triggered until time 102.409s, and this GC degenerates.
|
||||
//
|
||||
// Sample Time (s) Allocation Rate (MB/s) Headroom (GB)
|
||||
// 101.807 0.0 26.93
|
||||
// <--- accelerated spike can trigger here, around time 101.9s
|
||||
// 101.907 477.6 26.85
|
||||
// 102.007 3,206.0 26.35
|
||||
// 102.108 23,797.8 24.19
|
||||
// 102.208 24,164.5 21.83
|
||||
// 102.309 23,965.0 19.47
|
||||
// 102.409 24,624.35 17.05 <--- without accelerated rate detection, we trigger here
|
||||
//
|
||||
// Though the above measurements are from actual workload, the following details regarding sampled allocation rates at 3ms
|
||||
// period were not measured directly for this run-time sample. These are hypothetical, though they represent a plausible
|
||||
// result that correlates with the actual measurements.
|
||||
//
|
||||
// For most of the 100 ms time span that precedes the sample at 101.907, the allocation rate still remains at zero. The phase
|
||||
// change that causes increasing allocations occurs near the end ot this time segment. When sampled with a 3 ms period,
|
||||
// acceration of allocation can be triggered at approximately time 101.88s.
|
||||
//
|
||||
// In the default configuration, accelerated allocation rate is detected by examining a sequence of 8 allocation rate samples.
|
||||
//
|
||||
// Even a single allocation rate sample above the norm can be interpreted as acceleration of allocation rate. For example, the
|
||||
// the best-fit line for the following samples has an acceleration rate of 3,553.3 MB/s/s. This is not enough to trigger GC,
|
||||
// especially given the abundance of Headroom at this moment in time.
|
||||
//
|
||||
// TimeStamp (s) Alloc rate (MB/s)
|
||||
// 101.857 0
|
||||
// 101.860 0
|
||||
// 101.863 0
|
||||
// 101.866 0
|
||||
// 101.869 53.3
|
||||
//
|
||||
// At the next sample time, we will compute a slightly higher acceration, 9,150 MB/s/s. This is also insufficient to trigger
|
||||
// GC.
|
||||
//
|
||||
// TimeStamp (s) Alloc rate (MB/s)
|
||||
// 101.860 0
|
||||
// 101.863 0
|
||||
// 101.866 0
|
||||
// 101.869 53.3
|
||||
// 101.872 110.6
|
||||
//
|
||||
// Eventually, we will observe a full history of accelerating rate samples, computing acceleration of 18,500 MB/s/s. This will
|
||||
// trigger GC over 500 ms earlier than was previously possible.
|
||||
//
|
||||
// TimeStamp (s) Alloc rate (MB/s)
|
||||
// 101.866 0
|
||||
// 101.869 53.3
|
||||
// 101.872 110.6
|
||||
// 101.875 165.9
|
||||
// 101.878 221.2
|
||||
//
|
||||
// The accelerated rate heuristic is based on the following idea:
|
||||
//
|
||||
// Assume allocation rate is accelerating at a constant rate. If we postpone the spike trigger until the subsequent
|
||||
// sample point, will there be enough memory to satisfy allocations that occur during the anticipated concurrent GC
|
||||
// cycle? If not, we should trigger right now.
|
||||
//
|
||||
// Outline of this heuristic triggering technique:
|
||||
//
|
||||
// 1. We remember the N (e.g. N=3) most recent samples of spike allocation rate r0, r1, r2 samples at t0, t1, and t2
|
||||
// 2. if r1 < r0 or r2 < r1, approximate Acceleration = 0.0, Rate = Average(r0, r1, r2)
|
||||
// 3. Otherwise, use least squares method to compute best-fit line of rate vs time
|
||||
// 4. The slope of this line represents Acceleration. The y-intercept of this line represents "initial rate"
|
||||
// 5. Use r2 to rrpresent CurrentRate
|
||||
// 6. Use Consumption = CurrentRate * GCTime + 1/2 * Acceleration * GCTime * GCTime
|
||||
// (See High School physics discussions on constant acceleration: D = v0 * t + 1/2 * a * t^2)
|
||||
// 7. if Consumption exceeds headroom, trigger now
|
||||
//
|
||||
// Though larger sample size may improve quality of predictor, it also delays trigger response. Smaller sample sizes
|
||||
// are more susceptible to false triggers based on random noise. The default configuration uses a sample size of 8 and
|
||||
// a sample period of roughly 15 ms, spanning approximately 120 ms of execution.
|
||||
if (consumption_accelerated > allocatable_words) {
|
||||
size_t size_t_alloc_rate = (size_t) current_rate_by_acceleration * HeapWordSize;
|
||||
if (acceleration > 0) {
|
||||
size_t size_t_acceleration = (size_t) acceleration * HeapWordSize;
|
||||
log_trigger("Accelerated consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at "
|
||||
"current rate (" PROPERFMT "/s) with acceleration (" PROPERFMT "/s/s) for planned %s GC time (%.2f ms)",
|
||||
PROPERFMTARGS(consumption_accelerated * HeapWordSize),
|
||||
PROPERFMTARGS(allocatable_words * HeapWordSize),
|
||||
PROPERFMTARGS(size_t_alloc_rate),
|
||||
PROPERFMTARGS(size_t_acceleration),
|
||||
future_accelerated_planned_gc_time_is_average? "(from average)": "(by linear prediction)",
|
||||
future_accelerated_planned_gc_time * 1000);
|
||||
} else {
|
||||
log_trigger("Momentary spike consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at "
|
||||
"current rate (" PROPERFMT "/s) for planned %s GC time (%.2f ms) (spike threshold = %.2f)",
|
||||
PROPERFMTARGS(consumption_accelerated * HeapWordSize),
|
||||
PROPERFMTARGS(allocatable_words * HeapWordSize),
|
||||
PROPERFMTARGS(size_t_alloc_rate),
|
||||
future_accelerated_planned_gc_time_is_average? "(from average)": "(by linear prediction)",
|
||||
future_accelerated_planned_gc_time * 1000, _spike_threshold_sd);
|
||||
|
||||
|
||||
}
|
||||
_spike_acceleration_num_samples = 0;
|
||||
_spike_acceleration_first_sample_index = 0;
|
||||
|
||||
// Count this as a form of RATE trigger for purposes of adjusting heuristic triggering configuration because this
|
||||
// trigger is influenced more by margin_of_error_sd than by spike_threshold_sd.
|
||||
accept_trigger_with_type(RATE);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Suppose we don't trigger now, but decide to trigger in the next regulator cycle. What will be the GC time then?
|
||||
predicted_future_gc_time = predict_gc_time(now + get_planned_sleep_interval());
|
||||
if (predicted_future_gc_time > avg_cycle_time) {
|
||||
future_planned_gc_time = predicted_future_gc_time;
|
||||
future_planned_gc_time_is_average = false;
|
||||
} else {
|
||||
future_planned_gc_time = avg_cycle_time;
|
||||
future_planned_gc_time_is_average = true;
|
||||
}
|
||||
|
||||
log_debug(gc)("%s: average GC time: %.2f ms, predicted GC time: %.2f ms, allocation rate: %.0f %s/s",
|
||||
_space_info->name(), avg_cycle_time * 1000, predicted_future_gc_time * 1000,
|
||||
byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate));
|
||||
size_t allocatable_bytes = allocatable_words * HeapWordSize;
|
||||
avg_time_to_deplete_available = allocatable_bytes / avg_alloc_rate;
|
||||
|
||||
if (future_planned_gc_time > avg_time_to_deplete_available) {
|
||||
log_trigger("%s GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s)"
|
||||
" to deplete free headroom (%zu%s) (margin of error = %.2f)",
|
||||
future_planned_gc_time_is_average? "Average": "Linear prediction of", future_planned_gc_time * 1000,
|
||||
byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate),
|
||||
byte_size_in_proper_unit(allocatable_bytes), proper_unit_for_byte_size(allocatable_bytes),
|
||||
_margin_of_error_sd);
|
||||
|
||||
size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
|
||||
size_t penalties = capacity / 100 * _gc_time_penalties;
|
||||
size_t allocation_headroom = available;
|
||||
allocation_headroom -= MIN2(allocation_headroom, spike_headroom);
|
||||
allocation_headroom -= MIN2(allocation_headroom, penalties);
|
||||
log_info(gc, ergo)("Free headroom: " PROPERFMT " (free) - " PROPERFMT "(spike) - " PROPERFMT " (penalties) = " PROPERFMT,
|
||||
PROPERFMTARGS(available),
|
||||
PROPERFMTARGS(spike_headroom),
|
||||
PROPERFMTARGS(penalties),
|
||||
PROPERFMTARGS(allocation_headroom));
|
||||
accept_trigger_with_type(RATE);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);
|
||||
if (is_spiking && avg_cycle_time > allocation_headroom / rate) {
|
||||
log_trigger("Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s) to deplete free headroom (%zu%s) (spike threshold = %.2f)",
|
||||
avg_cycle_time * 1000,
|
||||
byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate),
|
||||
byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
|
||||
_spike_threshold_sd);
|
||||
is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);
|
||||
spike_time_to_deplete_available = (rate == 0)? 0: allocatable_bytes / rate;
|
||||
if (is_spiking && (rate != 0) && (future_planned_gc_time > spike_time_to_deplete_available)) {
|
||||
log_trigger("%s GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s)"
|
||||
" to deplete free headroom (%zu%s) (spike threshold = %.2f)",
|
||||
future_planned_gc_time_is_average? "Average": "Linear prediction of", future_planned_gc_time * 1000,
|
||||
byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate),
|
||||
byte_size_in_proper_unit(allocatable_bytes), proper_unit_for_byte_size(allocatable_bytes),
|
||||
_spike_threshold_sd);
|
||||
accept_trigger_with_type(SPIKE);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ShenandoahHeuristics::should_start_gc()) {
|
||||
_start_gc_is_pending = true;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return ShenandoahHeuristics::should_start_gc();
|
||||
}
|
||||
|
||||
void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) {
|
||||
@ -352,6 +711,112 @@ size_t ShenandoahAdaptiveHeuristics::min_free_threshold() {
|
||||
return ShenandoahHeap::heap()->soft_max_capacity() / 100 * ShenandoahMinFreeThreshold;
|
||||
}
|
||||
|
||||
// This is called each time a new rate sample has been gathered, as governed by ShenandoahAccelerationSamplePeriod.
|
||||
// Unlike traditional calculation of average allocation rate, there is no adjustment for standard deviation of the
|
||||
// accelerated rate prediction.
|
||||
size_t ShenandoahAdaptiveHeuristics::accelerated_consumption(double& acceleration, double& current_rate,
|
||||
double avg_alloc_rate_words_per_second,
|
||||
double predicted_cycle_time) const
|
||||
{
|
||||
double *x_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double));
|
||||
double *y_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double));
|
||||
double x_sum = 0.0;
|
||||
double y_sum = 0.0;
|
||||
|
||||
assert(_spike_acceleration_num_samples > 0, "At minimum, we should have sample from this period");
|
||||
|
||||
double weighted_average_alloc;
|
||||
if (_spike_acceleration_num_samples >= ShenandoahRateAccelerationSampleSize) {
|
||||
double weighted_y_sum = 0;
|
||||
double total_weight = 0;
|
||||
double previous_x = 0;
|
||||
uint delta = _spike_acceleration_num_samples - ShenandoahRateAccelerationSampleSize;
|
||||
for (uint i = 0; i < ShenandoahRateAccelerationSampleSize; i++) {
|
||||
uint index = (_spike_acceleration_first_sample_index + delta + i) % _spike_acceleration_buffer_size;
|
||||
x_array[i] = _spike_acceleration_rate_timestamps[index];
|
||||
x_sum += x_array[i];
|
||||
y_array[i] = _spike_acceleration_rate_samples[index];
|
||||
if (i > 0) {
|
||||
// first sample not included in weighted average because it has no weight.
|
||||
double sample_weight = x_array[i] - x_array[i-1];
|
||||
weighted_y_sum += y_array[i] * sample_weight;
|
||||
total_weight += sample_weight;
|
||||
}
|
||||
y_sum += y_array[i];
|
||||
}
|
||||
weighted_average_alloc = (total_weight > 0)? weighted_y_sum / total_weight: 0;
|
||||
} else {
|
||||
weighted_average_alloc = 0;
|
||||
}
|
||||
|
||||
double momentary_rate;
|
||||
if (_spike_acceleration_num_samples > ShenandoahMomentaryAllocationRateSpikeSampleSize) {
|
||||
// Num samples must be strictly greater than sample size, because we need one extra sample to compute rate and weights
|
||||
// In this context, the weight of a y value (an allocation rate) is the duration for which this allocation rate was
|
||||
// active (the time since previous y value was reported). An allocation rate measured over a span of 300 ms (e.g. during
|
||||
// concurrent GC) has much more "weight" than an allocation rate measured over a span of 15 s.
|
||||
double weighted_y_sum = 0;
|
||||
double total_weight = 0;
|
||||
double sum_for_average = 0.0;
|
||||
uint delta = _spike_acceleration_num_samples - ShenandoahMomentaryAllocationRateSpikeSampleSize;
|
||||
for (uint i = 0; i < ShenandoahMomentaryAllocationRateSpikeSampleSize; i++) {
|
||||
uint sample_index = (_spike_acceleration_first_sample_index + delta + i) % _spike_acceleration_buffer_size;
|
||||
uint preceding_index = (sample_index == 0)? _spike_acceleration_buffer_size - 1: sample_index - 1;
|
||||
double sample_weight = (_spike_acceleration_rate_timestamps[sample_index]
|
||||
- _spike_acceleration_rate_timestamps[preceding_index]);
|
||||
weighted_y_sum += _spike_acceleration_rate_samples[sample_index] * sample_weight;
|
||||
total_weight += sample_weight;
|
||||
}
|
||||
momentary_rate = weighted_y_sum / total_weight;
|
||||
bool is_spiking = _allocation_rate.is_spiking(momentary_rate, _spike_threshold_sd);
|
||||
if (!is_spiking) {
|
||||
// Disable momentary spike trigger unless allocation rate delta from average exceeds sd
|
||||
momentary_rate = 0.0;
|
||||
}
|
||||
} else {
|
||||
momentary_rate = 0.0;
|
||||
}
|
||||
|
||||
// By default, use momentary_rate for current rate and zero acceleration. Overwrite iff best-fit line has positive slope.
|
||||
current_rate = momentary_rate;
|
||||
acceleration = 0.0;
|
||||
if ((_spike_acceleration_num_samples >= ShenandoahRateAccelerationSampleSize)
|
||||
&& (weighted_average_alloc >= avg_alloc_rate_words_per_second)) {
|
||||
// If the average rate across the acceleration samples is below the overall average, this sample is not eligible to
|
||||
// represent acceleration of allocation rate. We may just be catching up with allocations after a lull.
|
||||
|
||||
double *xy_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double));
|
||||
double *x2_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double));
|
||||
double xy_sum = 0.0;
|
||||
double x2_sum = 0.0;
|
||||
for (uint i = 0; i < ShenandoahRateAccelerationSampleSize; i++) {
|
||||
xy_array[i] = x_array[i] * y_array[i];
|
||||
xy_sum += xy_array[i];
|
||||
x2_array[i] = x_array[i] * x_array[i];
|
||||
x2_sum += x2_array[i];
|
||||
}
|
||||
// Find the best-fit least-squares linear representation of rate vs time
|
||||
double m; /* slope */
|
||||
double b; /* y-intercept */
|
||||
|
||||
m = ((ShenandoahRateAccelerationSampleSize * xy_sum - x_sum * y_sum)
|
||||
/ (ShenandoahRateAccelerationSampleSize * x2_sum - x_sum * x_sum));
|
||||
b = (y_sum - m * x_sum) / ShenandoahRateAccelerationSampleSize;
|
||||
|
||||
if (m > 0) {
|
||||
double proposed_current_rate = m * x_array[ShenandoahRateAccelerationSampleSize - 1] + b;
|
||||
acceleration = m;
|
||||
current_rate = proposed_current_rate;
|
||||
}
|
||||
// else, leave current_rate = momentary_rate, acceleration = 0
|
||||
}
|
||||
// and here also, leave current_rate = momentary_rate, acceleration = 0
|
||||
|
||||
double time_delta = get_planned_sleep_interval() + predicted_cycle_time;
|
||||
size_t words_to_be_consumed = (size_t) (current_rate * time_delta + 0.5 * acceleration * time_delta * time_delta);
|
||||
return words_to_be_consumed;
|
||||
}
|
||||
|
||||
ShenandoahAllocationRate::ShenandoahAllocationRate() :
|
||||
_last_sample_time(os::elapsedTime()),
|
||||
_last_sample_value(0),
|
||||
@ -363,7 +828,7 @@ ShenandoahAllocationRate::ShenandoahAllocationRate() :
|
||||
double ShenandoahAllocationRate::force_sample(size_t allocated, size_t &unaccounted_bytes_allocated) {
|
||||
const double MinSampleTime = 0.002; // Do not sample if time since last update is less than 2 ms
|
||||
double now = os::elapsedTime();
|
||||
double time_since_last_update = now -_last_sample_time;
|
||||
double time_since_last_update = now - _last_sample_time;
|
||||
if (time_since_last_update < MinSampleTime) {
|
||||
unaccounted_bytes_allocated = allocated - _last_sample_value;
|
||||
_last_sample_value = 0;
|
||||
@ -412,8 +877,10 @@ bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const {
|
||||
|
||||
double sd = _rate.sd();
|
||||
if (sd > 0) {
|
||||
// There is a small chance that that rate has already been sampled, but it
|
||||
// seems not to matter in practice.
|
||||
// There is a small chance that that rate has already been sampled, but it seems not to matter in practice.
|
||||
// Note that z_score reports how close the rate is to the average. A value between -1 and 1 means we are within one
|
||||
// standard deviation. A value between -3 and +3 means we are within 3 standard deviations. We only check for z_score
|
||||
// greater than threshold because we are looking for an allocation spike which is greater than the mean.
|
||||
double z_score = (rate - _rate.avg()) / sd;
|
||||
if (z_score > threshold) {
|
||||
return true;
|
||||
|
||||
@ -27,7 +27,9 @@
|
||||
#define SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHADAPTIVEHEURISTICS_HPP
|
||||
|
||||
#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
|
||||
#include "gc/shenandoah/shenandoahFreeSet.hpp"
|
||||
#include "gc/shenandoah/shenandoahPhaseTimings.hpp"
|
||||
#include "gc/shenandoah/shenandoahRegulatorThread.hpp"
|
||||
#include "gc/shenandoah/shenandoahSharedVariables.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
#include "utilities/numberSeq.hpp"
|
||||
@ -108,6 +110,26 @@ public:
|
||||
|
||||
virtual ~ShenandoahAdaptiveHeuristics();
|
||||
|
||||
virtual void initialize() override;
|
||||
|
||||
virtual void post_initialize() override;
|
||||
|
||||
virtual void adjust_penalty(intx step) override;
|
||||
|
||||
// At the end of GC(N), we idle GC until necessary to start the next GC. Compute the threshold of memory that can be allocated
|
||||
// before we need to start the next GC.
|
||||
void start_idle_span() override;
|
||||
|
||||
// Having observed a new allocation rate sample, add this to the acceleration history so that we can determine if allocation
|
||||
// rate is accelerating.
|
||||
void add_rate_to_acceleration_history(double timestamp, double rate);
|
||||
|
||||
// Compute and return the current allocation rate, the current rate of acceleration, and the amount of memory that we expect
|
||||
// to consume if we start GC right now and gc takes predicted_cycle_time to complete.
|
||||
size_t accelerated_consumption(double& acceleration, double& current_rate,
|
||||
double avg_rate_words_per_sec, double predicted_cycle_time) const;
|
||||
|
||||
|
||||
void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
|
||||
RegionData* data, size_t size,
|
||||
size_t actual_free) override;
|
||||
@ -136,6 +158,8 @@ public:
|
||||
const static double LOWEST_EXPECTED_AVAILABLE_AT_END;
|
||||
const static double HIGHEST_EXPECTED_AVAILABLE_AT_END;
|
||||
|
||||
const static size_t GC_TIME_SAMPLE_SIZE;
|
||||
|
||||
friend class ShenandoahAllocationRate;
|
||||
|
||||
// Used to record the last trigger that signaled to start a GC.
|
||||
@ -150,9 +174,19 @@ public:
|
||||
void adjust_margin_of_error(double amount);
|
||||
void adjust_spike_threshold(double amount);
|
||||
|
||||
// Returns number of words that can be allocated before we need to trigger next GC, given available in bytes.
|
||||
inline size_t allocatable(size_t available) const {
|
||||
return (available > _headroom_adjustment)? (available - _headroom_adjustment) / HeapWordSize: 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
ShenandoahAllocationRate _allocation_rate;
|
||||
|
||||
// Invocations of should_start_gc() happen approximately once per ms. Queries of allocation rate only happen if a
|
||||
// a certain amount of time has passed since the previous query.
|
||||
size_t _allocated_at_previous_query;
|
||||
double _time_of_previous_allocation_query;
|
||||
|
||||
// The margin of error expressed in standard deviations to add to our
|
||||
// average cycle time and allocation rate. As this value increases we
|
||||
// tend to overestimate the rate at which mutators will deplete the
|
||||
@ -179,6 +213,48 @@ protected:
|
||||
// source of feedback to adjust trigger parameters.
|
||||
TruncatedSeq _available;
|
||||
|
||||
ShenandoahFreeSet* _free_set;
|
||||
|
||||
// This represents the time at which the allocation rate was most recently sampled for the purpose of detecting acceleration.
|
||||
double _previous_acceleration_sample_timestamp;
|
||||
size_t _total_allocations_at_start_of_idle;
|
||||
|
||||
// bytes of headroom at which we should trigger GC
|
||||
size_t _headroom_adjustment;
|
||||
|
||||
// Keep track of GC_TIME_SAMPLE_SIZE most recent concurrent GC cycle times
|
||||
uint _gc_time_first_sample_index;
|
||||
uint _gc_time_num_samples;
|
||||
double* const _gc_time_timestamps;
|
||||
double* const _gc_time_samples;
|
||||
double* const _gc_time_xy; // timestamp * sample
|
||||
double* const _gc_time_xx; // timestamp squared
|
||||
double _gc_time_sum_of_timestamps;
|
||||
double _gc_time_sum_of_samples;
|
||||
double _gc_time_sum_of_xy;
|
||||
double _gc_time_sum_of_xx;
|
||||
|
||||
double _gc_time_m; // slope
|
||||
double _gc_time_b; // y-intercept
|
||||
double _gc_time_sd; // sd on deviance from prediction
|
||||
|
||||
// In preparation for a span during which GC will be idle, compute the headroom adjustment that will be used to
|
||||
// detect when GC needs to trigger.
|
||||
void compute_headroom_adjustment() override;
|
||||
|
||||
void add_gc_time(double timestamp_at_start, double duration);
|
||||
void add_degenerated_gc_time(double timestamp_at_start, double duration);
|
||||
double predict_gc_time(double timestamp_at_start);
|
||||
|
||||
// Keep track of SPIKE_ACCELERATION_SAMPLE_SIZE most recent spike allocation rate measurements. Note that it is
|
||||
// typical to experience a small spike following end of GC cycle, as mutator threads refresh their TLABs. But
|
||||
// there is generally an abundance of memory at this time as well, so this will not generally trigger GC.
|
||||
uint _spike_acceleration_buffer_size;
|
||||
uint _spike_acceleration_first_sample_index;
|
||||
uint _spike_acceleration_num_samples;
|
||||
double* const _spike_acceleration_rate_samples; // holds rates in words/second
|
||||
double* const _spike_acceleration_rate_timestamps;
|
||||
|
||||
// A conservative minimum threshold of free space that we'll try to maintain when possible.
|
||||
// For example, we might trigger a concurrent gc if we are likely to drop below
|
||||
// this threshold, or we might consider this when dynamically resizing generations
|
||||
|
||||
@ -25,7 +25,6 @@
|
||||
|
||||
#include "gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp"
|
||||
#include "gc/shenandoah/shenandoahCollectionSet.hpp"
|
||||
#include "gc/shenandoah/shenandoahCollectionSetPreselector.hpp"
|
||||
#include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
|
||||
#include "gc/shenandoah/shenandoahGeneration.hpp"
|
||||
#include "gc/shenandoah/shenandoahGenerationalHeap.inline.hpp"
|
||||
@ -52,6 +51,12 @@ static int compare_by_aged_live(AgedRegionData a, AgedRegionData b) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ShenandoahGenerationalHeuristics::post_initialize() {
|
||||
ShenandoahHeuristics::post_initialize();
|
||||
_free_set = ShenandoahHeap::heap()->free_set();
|
||||
compute_headroom_adjustment();
|
||||
}
|
||||
|
||||
inline void assert_no_in_place_promotions() {
|
||||
#ifdef ASSERT
|
||||
class ShenandoahNoInPlacePromotions : public ShenandoahHeapRegionClosure {
|
||||
@ -70,20 +75,19 @@ ShenandoahGenerationalHeuristics::ShenandoahGenerationalHeuristics(ShenandoahGen
|
||||
}
|
||||
|
||||
void ShenandoahGenerationalHeuristics::choose_collection_set(ShenandoahCollectionSet* collection_set) {
|
||||
ShenandoahHeap* heap = ShenandoahHeap::heap();
|
||||
ShenandoahGenerationalHeap* heap = ShenandoahGenerationalHeap::heap();
|
||||
|
||||
assert(collection_set->is_empty(), "Collection set must be empty here");
|
||||
|
||||
_add_regions_to_old = 0;
|
||||
|
||||
// Seed the collection set with resource area-allocated
|
||||
// preselected regions, which are removed when we exit this scope.
|
||||
ShenandoahCollectionSetPreselector preselector(collection_set, heap->num_regions());
|
||||
ShenandoahInPlacePromotionPlanner in_place_promotions(heap);
|
||||
|
||||
// Find the amount that will be promoted, regions that will be promoted in
|
||||
// place, and preselected older regions that will be promoted by evacuation.
|
||||
compute_evacuation_budgets(heap);
|
||||
compute_evacuation_budgets(in_place_promotions, heap);
|
||||
|
||||
// Choose the collection set, including the regions preselected above for promotion into the old generation.
|
||||
filter_regions(collection_set);
|
||||
filter_regions(in_place_promotions, collection_set);
|
||||
|
||||
// Even if collection_set->is_empty(), we want to adjust budgets, making reserves available to mutator.
|
||||
adjust_evacuation_budgets(heap, collection_set);
|
||||
@ -102,7 +106,8 @@ void ShenandoahGenerationalHeuristics::choose_collection_set(ShenandoahCollectio
|
||||
}
|
||||
}
|
||||
|
||||
void ShenandoahGenerationalHeuristics::compute_evacuation_budgets(ShenandoahHeap* const heap) {
|
||||
void ShenandoahGenerationalHeuristics::compute_evacuation_budgets(ShenandoahInPlacePromotionPlanner& in_place_promotions,
|
||||
ShenandoahHeap* const heap) {
|
||||
shenandoah_assert_generational();
|
||||
|
||||
ShenandoahOldGeneration* const old_generation = heap->old_generation();
|
||||
@ -200,7 +205,7 @@ void ShenandoahGenerationalHeuristics::compute_evacuation_budgets(ShenandoahHeap
|
||||
// If is_global(), we let garbage-first heuristic determine cset membership. Otherwise, we give priority
|
||||
// to tenurable regions by preselecting regions for promotion by evacuation (obtaining the live data to seed promoted_reserve).
|
||||
// This also identifies regions that will be promoted in place. These use the tenuring threshold.
|
||||
const size_t consumed_by_advance_promotion = select_aged_regions(_generation->is_global()? 0: old_promo_reserve);
|
||||
const size_t consumed_by_advance_promotion = select_aged_regions(in_place_promotions, _generation->is_global()? 0: old_promo_reserve);
|
||||
assert(consumed_by_advance_promotion <= old_promo_reserve, "Do not promote more than budgeted");
|
||||
|
||||
// The young evacuation reserve can be no larger than young_unaffiliated. Planning to evacuate into partially consumed
|
||||
@ -224,24 +229,21 @@ void ShenandoahGenerationalHeuristics::compute_evacuation_budgets(ShenandoahHeap
|
||||
// case of a GLOBAL gc. During choose_collection_set() of GLOBAL, old will be expanded on demand.
|
||||
}
|
||||
|
||||
void ShenandoahGenerationalHeuristics::filter_regions(ShenandoahCollectionSet* collection_set) {
|
||||
assert(collection_set->is_empty(), "Must be empty");
|
||||
|
||||
void ShenandoahGenerationalHeuristics::filter_regions(ShenandoahInPlacePromotionPlanner& in_place_promotions,
|
||||
ShenandoahCollectionSet* collection_set) {
|
||||
auto heap = ShenandoahGenerationalHeap::heap();
|
||||
size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes();
|
||||
|
||||
const size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes();
|
||||
|
||||
// Check all pinned regions have updated status before choosing the collection set.
|
||||
heap->assert_pinned_region_status(_generation);
|
||||
|
||||
// Step 1. Build up the region candidates we care about, rejecting losers and accepting winners right away.
|
||||
|
||||
size_t num_regions = heap->num_regions();
|
||||
const size_t num_regions = heap->num_regions();
|
||||
|
||||
RegionData* candidates = _region_data;
|
||||
|
||||
size_t cand_idx = 0;
|
||||
size_t preselected_candidates = 0;
|
||||
|
||||
size_t total_garbage = 0;
|
||||
|
||||
@ -251,23 +253,12 @@ void ShenandoahGenerationalHeuristics::filter_regions(ShenandoahCollectionSet* c
|
||||
size_t free = 0;
|
||||
size_t free_regions = 0;
|
||||
|
||||
// This counts number of humongous regions that we intend to promote in this cycle.
|
||||
size_t humongous_regions_promoted = 0;
|
||||
// This counts number of regular regions that will be promoted in place.
|
||||
size_t regular_regions_promoted_in_place = 0;
|
||||
// This counts bytes of memory used by regular regions to be promoted in place.
|
||||
size_t regular_regions_promoted_usage = 0;
|
||||
// This counts bytes of memory free in regular regions to be promoted in place.
|
||||
size_t regular_regions_promoted_free = 0;
|
||||
// This counts bytes of garbage memory in regular regions to be promoted in place.
|
||||
size_t regular_regions_promoted_garbage = 0;
|
||||
|
||||
for (size_t i = 0; i < num_regions; i++) {
|
||||
ShenandoahHeapRegion* region = heap->get_region(i);
|
||||
if (!_generation->contains(region)) {
|
||||
continue;
|
||||
}
|
||||
size_t garbage = region->garbage();
|
||||
const size_t garbage = region->garbage();
|
||||
total_garbage += garbage;
|
||||
if (region->is_empty()) {
|
||||
free_regions++;
|
||||
@ -279,85 +270,48 @@ void ShenandoahGenerationalHeuristics::filter_regions(ShenandoahCollectionSet* c
|
||||
immediate_garbage += garbage;
|
||||
region->make_trash_immediate();
|
||||
} else {
|
||||
bool is_candidate;
|
||||
// This is our candidate for later consideration.
|
||||
if (collection_set->is_preselected(i)) {
|
||||
assert(heap->is_tenurable(region), "Preselection filter");
|
||||
is_candidate = true;
|
||||
preselected_candidates++;
|
||||
// Set garbage value to maximum value to force this into the sorted collection set.
|
||||
garbage = region_size_bytes;
|
||||
if (collection_set->is_in(i)) {
|
||||
assert(heap->is_tenurable(region), "Preselected region %zu must be tenurable", i);
|
||||
} else if (region->is_young() && heap->is_tenurable(region)) {
|
||||
// Note that for GLOBAL GC, region may be OLD, and OLD regions do not qualify for pre-selection
|
||||
|
||||
// This region is old enough to be promoted but it was not preselected, either because its garbage is below
|
||||
// old garbage threshold so it will be promoted in place, or because there is not sufficient room
|
||||
// in old gen to hold the evacuated copies of this region's live data. In both cases, we choose not to
|
||||
// This region is old enough to be promoted, but it was not preselected, either because its garbage is below
|
||||
// old garbage threshold so it will be promoted in place, or because there is insufficient room
|
||||
// in old gen to hold the evacuated copies of this region's live data. In either case, we choose not to
|
||||
// place this region into the collection set.
|
||||
if (region->get_top_before_promote() != nullptr) {
|
||||
// Region was included for promotion-in-place
|
||||
regular_regions_promoted_in_place++;
|
||||
regular_regions_promoted_usage += region->used_before_promote();
|
||||
regular_regions_promoted_free += region->free();
|
||||
regular_regions_promoted_garbage += region->garbage();
|
||||
}
|
||||
is_candidate = false;
|
||||
} else {
|
||||
is_candidate = true;
|
||||
}
|
||||
if (is_candidate) {
|
||||
// This is our candidate for later consideration.
|
||||
assert(region->get_top_before_promote() == nullptr, "Cannot add region %zu scheduled for in-place-promotion to the collection set", i);
|
||||
candidates[cand_idx].set_region_and_garbage(region, garbage);
|
||||
cand_idx++;
|
||||
}
|
||||
}
|
||||
} else if (region->is_humongous_start()) {
|
||||
// Reclaim humongous regions here, and count them as the immediate garbage
|
||||
#ifdef ASSERT
|
||||
bool reg_live = region->has_live();
|
||||
bool bm_live = _generation->complete_marking_context()->is_marked(cast_to_oop(region->bottom()));
|
||||
assert(reg_live == bm_live,
|
||||
"Humongous liveness and marks should agree. Region live: %s; Bitmap live: %s; Region Live Words: %zu",
|
||||
BOOL_TO_STR(reg_live), BOOL_TO_STR(bm_live), region->get_live_data_words());
|
||||
#endif
|
||||
DEBUG_ONLY(assert_humongous_mark_consistency(region));
|
||||
if (!region->has_live()) {
|
||||
heap->trash_humongous_region_at(region);
|
||||
|
||||
// Count only the start. Continuations would be counted on "trash" path
|
||||
immediate_regions++;
|
||||
immediate_garbage += garbage;
|
||||
} else {
|
||||
if (region->is_young() && heap->is_tenurable(region)) {
|
||||
oop obj = cast_to_oop(region->bottom());
|
||||
size_t humongous_regions = ShenandoahHeapRegion::required_regions(obj->size() * HeapWordSize);
|
||||
humongous_regions_promoted += humongous_regions;
|
||||
}
|
||||
}
|
||||
} else if (region->is_trash()) {
|
||||
// Count in just trashed collection set, during coalesced CM-with-UR
|
||||
// Count in just trashed humongous continuation regions
|
||||
immediate_regions++;
|
||||
immediate_garbage += garbage;
|
||||
}
|
||||
}
|
||||
heap->old_generation()->set_expected_humongous_region_promotions(humongous_regions_promoted);
|
||||
heap->old_generation()->set_expected_regular_region_promotions(regular_regions_promoted_in_place);
|
||||
log_info(gc, ergo)("Planning to promote in place %zu humongous regions and %zu"
|
||||
" regular regions, spanning a total of %zu used bytes",
|
||||
humongous_regions_promoted, regular_regions_promoted_in_place,
|
||||
humongous_regions_promoted * ShenandoahHeapRegion::region_size_bytes() +
|
||||
regular_regions_promoted_usage);
|
||||
|
||||
// Step 2. Look back at garbage statistics, and decide if we want to collect anything,
|
||||
// given the amount of immediately reclaimable garbage. If we do, figure out the collection set.
|
||||
assert(immediate_garbage <= total_garbage,
|
||||
"Cannot have more immediate garbage than total garbage: " PROPERFMT " vs " PROPERFMT,
|
||||
PROPERFMTARGS(immediate_garbage), PROPERFMTARGS(total_garbage));
|
||||
|
||||
assert (immediate_garbage <= total_garbage,
|
||||
"Cannot have more immediate garbage than total garbage: %zu%s vs %zu%s",
|
||||
byte_size_in_proper_unit(immediate_garbage), proper_unit_for_byte_size(immediate_garbage),
|
||||
byte_size_in_proper_unit(total_garbage), proper_unit_for_byte_size(total_garbage));
|
||||
|
||||
size_t immediate_percent = (total_garbage == 0) ? 0 : (immediate_garbage * 100 / total_garbage);
|
||||
bool doing_promote_in_place = (humongous_regions_promoted + regular_regions_promoted_in_place > 0);
|
||||
|
||||
if (doing_promote_in_place || (preselected_candidates > 0) || (immediate_percent <= ShenandoahImmediateThreshold)) {
|
||||
const size_t immediate_percent = (total_garbage == 0) ? 0 : (immediate_garbage * 100 / total_garbage);
|
||||
const bool has_preselected_regions = !collection_set->is_empty();
|
||||
if (has_preselected_regions || (immediate_percent <= ShenandoahImmediateThreshold)) {
|
||||
// Call the subclasses to add young-gen regions into the collection set.
|
||||
choose_collection_set_from_regiondata(collection_set, candidates, cand_idx, immediate_garbage + free);
|
||||
}
|
||||
@ -370,15 +324,15 @@ void ShenandoahGenerationalHeuristics::filter_regions(ShenandoahCollectionSet* c
|
||||
|
||||
ShenandoahTracer::report_evacuation_info(collection_set,
|
||||
free_regions,
|
||||
humongous_regions_promoted,
|
||||
regular_regions_promoted_in_place,
|
||||
regular_regions_promoted_garbage,
|
||||
regular_regions_promoted_free,
|
||||
in_place_promotions.humongous_region_stats().count,
|
||||
in_place_promotions.regular_region_stats().count,
|
||||
in_place_promotions.regular_region_stats().garbage,
|
||||
in_place_promotions.regular_region_stats().free,
|
||||
immediate_regions,
|
||||
immediate_garbage);
|
||||
}
|
||||
|
||||
// Preselect for inclusion into the collection set all regions whose age is at or above tenure age and for which the
|
||||
// Select for inclusion into the collection set all regions whose age is at or above tenure age and for which the
|
||||
// garbage percentage exceeds a dynamically adjusted threshold (known as the old-garbage threshold percentage). We
|
||||
// identify these regions by setting the appropriate entry of the collection set's preselected regions array to true.
|
||||
// All entries are initialized to false before calling this function.
|
||||
@ -394,15 +348,13 @@ void ShenandoahGenerationalHeuristics::filter_regions(ShenandoahCollectionSet* c
|
||||
// that this allows us to more accurately budget memory to hold the results of evacuation. Memory for evacuation
|
||||
// of aged regions must be reserved in the old generation. Memory for evacuation of all other regions must be
|
||||
// reserved in the young generation.
|
||||
size_t ShenandoahGenerationalHeuristics::select_aged_regions(const size_t old_promotion_reserve) {
|
||||
size_t ShenandoahGenerationalHeuristics::select_aged_regions(ShenandoahInPlacePromotionPlanner& in_place_promotions,
|
||||
const size_t old_promotion_reserve) {
|
||||
|
||||
// There should be no regions configured for subsequent in-place-promotions carried over from the previous cycle.
|
||||
assert_no_in_place_promotions();
|
||||
|
||||
auto const heap = ShenandoahGenerationalHeap::heap();
|
||||
ShenandoahFreeSet* free_set = heap->free_set();
|
||||
bool* const candidate_regions_for_promotion_by_copy = heap->collection_set()->preselected_regions();
|
||||
ShenandoahMarkingContext* const ctx = heap->marking_context();
|
||||
|
||||
size_t promo_potential = 0;
|
||||
size_t candidates = 0;
|
||||
@ -415,14 +367,21 @@ size_t ShenandoahGenerationalHeuristics::select_aged_regions(const size_t old_pr
|
||||
ResourceMark rm;
|
||||
AgedRegionData* sorted_regions = NEW_RESOURCE_ARRAY(AgedRegionData, num_regions);
|
||||
|
||||
ShenandoahInPlacePromotionPlanner in_place_promotions(heap);
|
||||
|
||||
for (idx_t i = 0; i < num_regions; i++) {
|
||||
ShenandoahHeapRegion* const r = heap->get_region(i);
|
||||
if (r->is_empty() || !r->has_live() || !r->is_young() || !r->is_regular()) {
|
||||
// skip over regions that aren't regular young with some live data
|
||||
if (r->is_empty() || !r->has_live() || !r->is_young()) {
|
||||
// skip over regions that aren't young with some live data
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!r->is_regular()) {
|
||||
if (r->is_humongous() && heap->is_tenurable(r)) {
|
||||
in_place_promotions.prepare(r);
|
||||
}
|
||||
// Nothing else to be done for humongous regions
|
||||
continue;
|
||||
}
|
||||
|
||||
if (heap->is_tenurable(r)) {
|
||||
if (in_place_promotions.is_eligible(r)) {
|
||||
// We prefer to promote this region in place because it has a small amount of garbage and a large usage.
|
||||
@ -464,7 +423,7 @@ size_t ShenandoahGenerationalHeuristics::select_aged_regions(const size_t old_pr
|
||||
// Subsequent regions may be selected if they have smaller live data.
|
||||
}
|
||||
|
||||
in_place_promotions.update_free_set();
|
||||
in_place_promotions.complete_planning();
|
||||
|
||||
// Sort in increasing order according to live data bytes. Note that candidates represents the number of regions
|
||||
// that qualify to be promoted by evacuation.
|
||||
@ -479,14 +438,14 @@ size_t ShenandoahGenerationalHeuristics::select_aged_regions(const size_t old_pr
|
||||
const size_t promotion_need = (size_t) (region_live_data * ShenandoahPromoEvacWaste);
|
||||
if (old_consumed + promotion_need <= old_promotion_reserve) {
|
||||
old_consumed += promotion_need;
|
||||
candidate_regions_for_promotion_by_copy[region->index()] = true;
|
||||
heap->collection_set()->add_region(region);
|
||||
selected_regions++;
|
||||
selected_live += region_live_data;
|
||||
} else {
|
||||
// We rejected this promotable region from the collection set because we had no room to hold its copy.
|
||||
// Add this region to promo potential for next GC.
|
||||
promo_potential += region_live_data;
|
||||
assert(!candidate_regions_for_promotion_by_copy[region->index()], "Shouldn't be selected");
|
||||
assert(!heap->collection_set()->is_in(region), "Region %zu shouldn't be in the collection set", region->index());
|
||||
}
|
||||
// We keep going even if one region is excluded from selection because we need to accumulate all eligible
|
||||
// regions that are not preselected into promo_potential
|
||||
@ -550,12 +509,12 @@ void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahHeap*
|
||||
size_t young_evacuated = collection_set->get_live_bytes_in_untenurable_regions();
|
||||
size_t young_evacuated_reserve_used = (size_t) (ShenandoahEvacWaste * double(young_evacuated));
|
||||
|
||||
size_t total_young_available = young_generation->available_with_reserve() - _add_regions_to_old * region_size_bytes;;
|
||||
size_t total_young_available = young_generation->available_with_reserve() - _add_regions_to_old * region_size_bytes;
|
||||
assert(young_evacuated_reserve_used <= total_young_available, "Cannot evacuate (%zu) more than is available in young (%zu)",
|
||||
young_evacuated_reserve_used, total_young_available);
|
||||
young_generation->set_evacuation_reserve(young_evacuated_reserve_used);
|
||||
|
||||
// We have not yet rebuilt the free set. Some of the memory that is thought to be avaiable within old may no
|
||||
// We have not yet rebuilt the free set. Some of the memory that is thought to be available within old may no
|
||||
// longer be available if that memory had been free within regions that were selected for the collection set.
|
||||
// Make the necessary adjustments to old_available.
|
||||
size_t old_available =
|
||||
@ -634,24 +593,3 @@ void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahHeap*
|
||||
old_generation->set_promoted_reserve(total_promotion_reserve);
|
||||
old_generation->reset_promoted_expended();
|
||||
}
|
||||
|
||||
size_t ShenandoahGenerationalHeuristics::add_preselected_regions_to_collection_set(ShenandoahCollectionSet* cset,
|
||||
const RegionData* data,
|
||||
size_t size) const {
|
||||
// cur_young_garbage represents the amount of memory to be reclaimed from young-gen. In the case that live objects
|
||||
// are known to be promoted out of young-gen, we count this as cur_young_garbage because this memory is reclaimed
|
||||
// from young-gen and becomes available to serve future young-gen allocation requests.
|
||||
size_t cur_young_garbage = 0;
|
||||
for (size_t idx = 0; idx < size; idx++) {
|
||||
ShenandoahHeapRegion* r = data[idx].get_region();
|
||||
if (cset->is_preselected(r->index())) {
|
||||
assert(ShenandoahGenerationalHeap::heap()->is_tenurable(r), "Preselected regions must have tenure age");
|
||||
// Entire region will be promoted, This region does not impact young-gen or old-gen evacuation reserve.
|
||||
// This region has been pre-selected and its impact on promotion reserve is already accounted for.
|
||||
cur_young_garbage += r->garbage();
|
||||
cset->add_region(r);
|
||||
}
|
||||
}
|
||||
return cur_young_garbage;
|
||||
}
|
||||
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
|
||||
|
||||
#include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp"
|
||||
#include "gc/shenandoah/shenandoahInPlacePromoter.hpp"
|
||||
|
||||
class ShenandoahGeneration;
|
||||
class ShenandoahHeap;
|
||||
@ -49,9 +50,11 @@ public:
|
||||
|
||||
void choose_collection_set(ShenandoahCollectionSet* collection_set) override;
|
||||
|
||||
virtual void post_initialize() override;
|
||||
|
||||
private:
|
||||
// Compute evacuation budgets prior to choosing collection set.
|
||||
void compute_evacuation_budgets(ShenandoahHeap* const heap);
|
||||
void compute_evacuation_budgets(ShenandoahInPlacePromotionPlanner& in_place_promotions, ShenandoahHeap* const heap);
|
||||
|
||||
// Preselect for possible inclusion into the collection set exactly the most
|
||||
// garbage-dense regions, including those that satisfy criteria 1 & 2 below,
|
||||
@ -68,10 +71,10 @@ private:
|
||||
// regions, which are marked in the preselected_regions() indicator
|
||||
// array of the heap's collection set, which should be initialized
|
||||
// to false.
|
||||
size_t select_aged_regions(const size_t old_promotion_reserve);
|
||||
size_t select_aged_regions(ShenandoahInPlacePromotionPlanner& in_place_promotions, const size_t old_promotion_reserve);
|
||||
|
||||
// Filter and sort remaining regions before adding to collection set.
|
||||
void filter_regions(ShenandoahCollectionSet* collection_set);
|
||||
void filter_regions(ShenandoahInPlacePromotionPlanner& in_place_promotions, ShenandoahCollectionSet* collection_set);
|
||||
|
||||
// Adjust evacuation budgets after choosing collection set. The argument regions_to_xfer
|
||||
// represents regions to be transferred to old based on decisions made in top_off_collection_set()
|
||||
@ -82,10 +85,6 @@ protected:
|
||||
ShenandoahGeneration* _generation;
|
||||
|
||||
size_t _add_regions_to_old;
|
||||
|
||||
size_t add_preselected_regions_to_collection_set(ShenandoahCollectionSet* cset,
|
||||
const RegionData* data,
|
||||
size_t size) const;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -127,7 +127,7 @@ void ShenandoahGlobalHeuristics::choose_global_collection_set(ShenandoahCollecti
|
||||
size_t cur_garbage = cur_young_garbage;
|
||||
for (size_t idx = 0; idx < size; idx++) {
|
||||
ShenandoahHeapRegion* r = data[idx].get_region();
|
||||
assert(!cset->is_preselected(r->index()), "There should be no preselected regions during GLOBAL GC");
|
||||
assert(!cset->is_in(r->index()), "Region (%zu) should not be in the collection set", r->index());
|
||||
bool add_region = false;
|
||||
size_t region_garbage = r->garbage();
|
||||
size_t new_garbage = cur_garbage + region_garbage;
|
||||
|
||||
@ -46,13 +46,16 @@ int ShenandoahHeuristics::compare_by_garbage(RegionData a, RegionData b) {
|
||||
}
|
||||
|
||||
ShenandoahHeuristics::ShenandoahHeuristics(ShenandoahSpaceInfo* space_info) :
|
||||
_most_recent_trigger_evaluation_time(os::elapsedTime()),
|
||||
_most_recent_planned_sleep_interval(0.0),
|
||||
_start_gc_is_pending(false),
|
||||
_declined_trigger_count(0),
|
||||
_most_recent_declined_trigger_count(0),
|
||||
_space_info(space_info),
|
||||
_region_data(nullptr),
|
||||
_guaranteed_gc_interval(0),
|
||||
_cycle_start(os::elapsedTime()),
|
||||
_precursor_cycle_start(os::elapsedTime()),
|
||||
_cycle_start(_precursor_cycle_start),
|
||||
_last_cycle_end(0),
|
||||
_gc_times_learned(0),
|
||||
_gc_time_penalties(0),
|
||||
@ -119,13 +122,7 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec
|
||||
}
|
||||
} else if (region->is_humongous_start()) {
|
||||
// Reclaim humongous regions here, and count them as the immediate garbage
|
||||
#ifdef ASSERT
|
||||
bool reg_live = region->has_live();
|
||||
bool bm_live = heap->global_generation()->complete_marking_context()->is_marked(cast_to_oop(region->bottom()));
|
||||
assert(reg_live == bm_live,
|
||||
"Humongous liveness and marks should agree. Region live: %s; Bitmap live: %s; Region Live Words: %zu",
|
||||
BOOL_TO_STR(reg_live), BOOL_TO_STR(bm_live), region->get_live_data_words());
|
||||
#endif
|
||||
DEBUG_ONLY(assert_humongous_mark_consistency(region));
|
||||
if (!region->has_live()) {
|
||||
heap->trash_humongous_region_at(region);
|
||||
|
||||
@ -134,7 +131,7 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec
|
||||
immediate_garbage += garbage;
|
||||
}
|
||||
} else if (region->is_trash()) {
|
||||
// Count in just trashed collection set, during coalesced CM-with-UR
|
||||
// Count in just trashed humongous continuation regions
|
||||
immediate_regions++;
|
||||
immediate_garbage += garbage;
|
||||
}
|
||||
@ -142,13 +139,11 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec
|
||||
|
||||
// Step 2. Look back at garbage statistics, and decide if we want to collect anything,
|
||||
// given the amount of immediately reclaimable garbage. If we do, figure out the collection set.
|
||||
assert(immediate_garbage <= total_garbage,
|
||||
"Cannot have more immediate garbage than total garbage: " PROPERFMT " vs " PROPERFMT,
|
||||
PROPERFMTARGS(immediate_garbage), PROPERFMTARGS(total_garbage));
|
||||
|
||||
assert (immediate_garbage <= total_garbage,
|
||||
"Cannot have more immediate garbage than total garbage: %zu%s vs %zu%s",
|
||||
byte_size_in_proper_unit(immediate_garbage), proper_unit_for_byte_size(immediate_garbage),
|
||||
byte_size_in_proper_unit(total_garbage), proper_unit_for_byte_size(total_garbage));
|
||||
|
||||
size_t immediate_percent = (total_garbage == 0) ? 0 : (immediate_garbage * 100 / total_garbage);
|
||||
const size_t immediate_percent = (total_garbage == 0) ? 0 : (immediate_garbage * 100 / total_garbage);
|
||||
|
||||
if (immediate_percent <= ShenandoahImmediateThreshold) {
|
||||
choose_collection_set_from_regiondata(collection_set, candidates, cand_idx, immediate_garbage + free);
|
||||
@ -156,6 +151,19 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec
|
||||
collection_set->summarize(total_garbage, immediate_garbage, immediate_regions);
|
||||
}
|
||||
|
||||
void ShenandoahHeuristics::start_idle_span() {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
void ShenandoahHeuristics::record_degenerated_cycle_start(bool out_of_cycle) {
|
||||
if (out_of_cycle) {
|
||||
_precursor_cycle_start = _cycle_start = os::elapsedTime();
|
||||
} else {
|
||||
_precursor_cycle_start = _cycle_start;
|
||||
_cycle_start = os::elapsedTime();
|
||||
}
|
||||
}
|
||||
|
||||
void ShenandoahHeuristics::record_cycle_start() {
|
||||
_cycle_start = os::elapsedTime();
|
||||
}
|
||||
@ -197,7 +205,6 @@ bool ShenandoahHeuristics::should_degenerate_cycle() {
|
||||
void ShenandoahHeuristics::adjust_penalty(intx step) {
|
||||
assert(0 <= _gc_time_penalties && _gc_time_penalties <= 100,
|
||||
"In range before adjustment: %zd", _gc_time_penalties);
|
||||
|
||||
if ((_most_recent_declined_trigger_count <= Penalty_Free_Declinations) && (step > 0)) {
|
||||
// Don't penalize if heuristics are not responsible for a negative outcome. Allow Penalty_Free_Declinations following
|
||||
// previous GC for self calibration without penalty.
|
||||
@ -274,6 +281,30 @@ void ShenandoahHeuristics::initialize() {
|
||||
// Nothing to do by default.
|
||||
}
|
||||
|
||||
void ShenandoahHeuristics::post_initialize() {
|
||||
// Nothing to do by default.
|
||||
}
|
||||
|
||||
double ShenandoahHeuristics::elapsed_cycle_time() const {
|
||||
return os::elapsedTime() - _cycle_start;
|
||||
}
|
||||
|
||||
|
||||
// Includes the time spent in abandoned concurrent GC cycle that may have triggered this degenerated cycle.
|
||||
double ShenandoahHeuristics::elapsed_degenerated_cycle_time() const {
|
||||
double now = os::elapsedTime();
|
||||
return now - _precursor_cycle_start;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void ShenandoahHeuristics::assert_humongous_mark_consistency(ShenandoahHeapRegion* region) {
|
||||
assert(region->is_humongous(), "Region %zu must be humongous", region->index());
|
||||
const oop humongous_oop = cast_to_oop(region->bottom());
|
||||
ShenandoahGeneration* generation = ShenandoahHeap::heap()->generation_for(region->affiliation());
|
||||
const bool bm_live = generation->complete_marking_context()->is_marked(humongous_oop);
|
||||
const bool reg_live = region->has_live();
|
||||
assert(reg_live == bm_live,
|
||||
"Humongous liveness and marks should agree. Region live: %s; Bitmap live: %s; Region Live Words: %zu",
|
||||
BOOL_TO_STR(reg_live), BOOL_TO_STR(bm_live), region->get_live_data_words());
|
||||
}
|
||||
#endif
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user