Merge branch 'openjdk:master' into JDK-8370196

This commit is contained in:
Zihao Lin 2025-12-02 23:37:48 +08:00 committed by GitHub
commit 30fa1f0380
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
398 changed files with 10129 additions and 7659 deletions

View File

@ -50,7 +50,14 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
# add -z,relro (mark relocations read only) for all libs
# add -z,now ("full relro" - more of the Global Offset Table GOT is marked read only)
# add --no-as-needed to disable default --as-needed link flag on some GCC toolchains
# add --icf=all (Identical Code Folding — merges identical functions)
BASIC_LDFLAGS="-Wl,-z,defs -Wl,-z,relro -Wl,-z,now -Wl,--no-as-needed -Wl,--exclude-libs,ALL"
if test "x$LINKER_TYPE" = "xgold"; then
if test x$DEBUG_LEVEL = xrelease; then
BASIC_LDFLAGS="$BASIC_LDFLAGS -Wl,--icf=all"
fi
fi
# Linux : remove unused code+data in link step
if test "x$ENABLE_LINKTIME_GC" = xtrue; then
if test "x$OPENJDK_TARGET_CPU" = xs390x; then

View File

@ -516,6 +516,7 @@ AC_DEFUN([TOOLCHAIN_EXTRACT_LD_VERSION],
if [ [[ "$LINKER_VERSION_STRING" == *gold* ]] ]; then
[ LINKER_VERSION_NUMBER=`$ECHO $LINKER_VERSION_STRING | \
$SED -e 's/.* \([0-9][0-9]*\(\.[0-9][0-9]*\)*\).*) .*/\1/'` ]
LINKER_TYPE=gold
else
[ LINKER_VERSION_NUMBER=`$ECHO $LINKER_VERSION_STRING | \
$SED -e 's/.* \([0-9][0-9]*\(\.[0-9][0-9]*\)*\).*/\1/'` ]

View File

@ -170,6 +170,7 @@ ifeq ($(call check-jvm-feature, compiler2), true)
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_vector.ad \
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_atomic.ad \
)))
endif

View File

@ -237,7 +237,7 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)
DISABLED_WARNINGS_gcc_dgif_lib.c := sign-compare, \
DISABLED_WARNINGS_gcc_jcmaster.c := implicit-fallthrough, \
DISABLED_WARNINGS_gcc_jdphuff.c := shift-negative-value, \
DISABLED_WARNINGS_gcc_png.c := maybe-uninitialized unused-function, \
DISABLED_WARNINGS_gcc_png.c := maybe-uninitialized, \
DISABLED_WARNINGS_gcc_pngerror.c := maybe-uninitialized, \
DISABLED_WARNINGS_gcc_splashscreen_gfx_impl.c := implicit-fallthrough \
maybe-uninitialized, \
@ -248,7 +248,6 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)
DISABLED_WARNINGS_clang := deprecated-non-prototype, \
DISABLED_WARNINGS_clang_dgif_lib.c := sign-compare, \
DISABLED_WARNINGS_clang_gzwrite.c := format-nonliteral, \
DISABLED_WARNINGS_clang_png.c := unused-function, \
DISABLED_WARNINGS_clang_splashscreen_impl.c := sign-compare \
unused-but-set-variable unused-function, \
DISABLED_WARNINGS_clang_splashscreen_png.c := \

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,909 @@
// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2016, 2021, Red Hat Inc. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//
// BEGIN This file is automatically generated. Do not edit --------------
// Sundry CAS operations. Note that release is always true,
// regardless of the memory ordering of the CAS. This is because we
// need the volatile case to be sequentially consistent but there is
// no trailing StoreLoad barrier emitted by C2. Unfortunately we
// can't check the type of memory ordering here, so we always emit a
// STLXR.
// This section is generated from aarch64_atomic_ad.m4
instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchgb $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::byte, /*acquire*/ false, /*release*/ true,
/*weak*/ false, $res$$Register);
__ sxtbw($res$$Register, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchgs $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::halfword, /*acquire*/ false, /*release*/ true,
/*weak*/ false, $res$$Register);
__ sxthw($res$$Register, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchgw $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ false, /*release*/ true,
/*weak*/ false, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchg $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ false, /*release*/ true,
/*weak*/ false, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ false, /*release*/ true,
/*weak*/ false, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchg $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ false, /*release*/ true,
/*weak*/ false, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::byte, /*acquire*/ true, /*release*/ true,
/*weak*/ false, $res$$Register);
__ sxtbw($res$$Register, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::halfword, /*acquire*/ true, /*release*/ true,
/*weak*/ false, $res$$Register);
__ sxthw($res$$Register, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ true, /*release*/ true,
/*weak*/ false, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchg_acq $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ true, /*release*/ true,
/*weak*/ false, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ true, /*release*/ true,
/*weak*/ false, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ true, /*release*/ true,
/*weak*/ false, $res$$Register);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapB mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgb $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::byte, /*acquire*/ false, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapS mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgs $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::halfword, /*acquire*/ false, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapI mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgw $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ false, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
match(Set res (CompareAndSwapL mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ false, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ false, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ false, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndSwapB mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::byte, /*acquire*/ true, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndSwapS mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::halfword, /*acquire*/ true, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndSwapI mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ true, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (CompareAndSwapL mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg_acq $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ true, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ true, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ true, /*release*/ true,
/*weak*/ false, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgb_weak $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::byte, /*acquire*/ false, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgs_weak $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::halfword, /*acquire*/ false, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgw_weak $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ false, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg_weak $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ false, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgw_weak $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ false, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
ins_cost(2*VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg_weak $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ false, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgb_acq_weak $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::byte, /*acquire*/ true, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgs_acq_weak $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::halfword, /*acquire*/ true, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgw_acq_weak $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ true, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg_acq_weak $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ true, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchgw_acq_weak $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::word, /*acquire*/ true, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
ins_cost(VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg_acq_weak $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::xword, /*acquire*/ true, /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}
instruct getAndSetI(indirect mem, iRegI newval, iRegINoSp oldval) %{
match(Set oldval (GetAndSetI mem newval));
ins_cost(2*VOLATILE_REF_COST);
format %{ "atomic_xchgw $oldval, $newval, [$mem]" %}
ins_encode %{
__ atomic_xchgw($oldval$$Register, $newval$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndSetL(indirect mem, iRegL newval, iRegLNoSp oldval) %{
match(Set oldval (GetAndSetL mem newval));
ins_cost(2*VOLATILE_REF_COST);
format %{ "atomic_xchg $oldval, $newval, [$mem]" %}
ins_encode %{
__ atomic_xchg($oldval$$Register, $newval$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndSetN(indirect mem, iRegN newval, iRegNNoSp oldval) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set oldval (GetAndSetN mem newval));
ins_cost(2*VOLATILE_REF_COST);
format %{ "atomic_xchgw $oldval, $newval, [$mem]" %}
ins_encode %{
__ atomic_xchgw($oldval$$Register, $newval$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndSetP(indirect mem, iRegP newval, iRegPNoSp oldval) %{
predicate(n->as_LoadStore()->barrier_data() == 0);
match(Set oldval (GetAndSetP mem newval));
ins_cost(2*VOLATILE_REF_COST);
format %{ "atomic_xchg $oldval, $newval, [$mem]" %}
ins_encode %{
__ atomic_xchg($oldval$$Register, $newval$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndSetIAcq(indirect mem, iRegI newval, iRegINoSp oldval) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set oldval (GetAndSetI mem newval));
ins_cost(2*VOLATILE_REF_COST);
format %{ "atomic_xchgw_acq $oldval, $newval, [$mem]" %}
ins_encode %{
__ atomic_xchgalw($oldval$$Register, $newval$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndSetLAcq(indirect mem, iRegL newval, iRegLNoSp oldval) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set oldval (GetAndSetL mem newval));
ins_cost(2*VOLATILE_REF_COST);
format %{ "atomic_xchg_acq $oldval, $newval, [$mem]" %}
ins_encode %{
__ atomic_xchgal($oldval$$Register, $newval$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndSetNAcq(indirect mem, iRegN newval, iRegNNoSp oldval) %{
predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
match(Set oldval (GetAndSetN mem newval));
ins_cost(2*VOLATILE_REF_COST);
format %{ "atomic_xchgw_acq $oldval, $newval, [$mem]" %}
ins_encode %{
__ atomic_xchgalw($oldval$$Register, $newval$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp oldval) %{
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
match(Set oldval (GetAndSetP mem newval));
ins_cost(2*VOLATILE_REF_COST);
format %{ "atomic_xchg_acq $oldval, $newval, [$mem]" %}
ins_encode %{
__ atomic_xchgal($oldval$$Register, $newval$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
match(Set newval (GetAndAddI mem incr));
ins_cost(2*VOLATILE_REF_COST+1);
format %{ "get_and_addI $newval, [$mem], $incr" %}
ins_encode %{
__ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set newval (GetAndAddI mem incr));
ins_cost(VOLATILE_REF_COST+1);
format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
ins_encode %{
__ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddINoRes(indirect mem, Universe dummy, iRegIorL2I incr) %{
predicate(n->as_LoadStore()->result_not_used());
match(Set dummy (GetAndAddI mem incr));
ins_cost(2*VOLATILE_REF_COST);
format %{ "get_and_addI noreg, [$mem], $incr" %}
ins_encode %{
__ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddIAcqNoRes(indirect mem, Universe dummy, iRegIorL2I incr) %{
predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
match(Set dummy (GetAndAddI mem incr));
ins_cost(VOLATILE_REF_COST);
format %{ "get_and_addI_acq noreg, [$mem], $incr" %}
ins_encode %{
__ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddIConst(indirect mem, iRegINoSp newval, immIAddSub incr) %{
match(Set newval (GetAndAddI mem incr));
ins_cost(2*VOLATILE_REF_COST+1);
format %{ "get_and_addI $newval, [$mem], $incr" %}
ins_encode %{
__ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddIAcqConst(indirect mem, iRegINoSp newval, immIAddSub incr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set newval (GetAndAddI mem incr));
ins_cost(VOLATILE_REF_COST+1);
format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
ins_encode %{
__ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddINoResConst(indirect mem, Universe dummy, immIAddSub incr) %{
predicate(n->as_LoadStore()->result_not_used());
match(Set dummy (GetAndAddI mem incr));
ins_cost(2*VOLATILE_REF_COST);
format %{ "get_and_addI noreg, [$mem], $incr" %}
ins_encode %{
__ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddIAcqNoResConst(indirect mem, Universe dummy, immIAddSub incr) %{
predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
match(Set dummy (GetAndAddI mem incr));
ins_cost(VOLATILE_REF_COST);
format %{ "get_and_addI_acq noreg, [$mem], $incr" %}
ins_encode %{
__ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddL(indirect mem, iRegLNoSp newval, iRegL incr) %{
match(Set newval (GetAndAddL mem incr));
ins_cost(2*VOLATILE_REF_COST+1);
format %{ "get_and_addL $newval, [$mem], $incr" %}
ins_encode %{
__ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set newval (GetAndAddL mem incr));
ins_cost(VOLATILE_REF_COST+1);
format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
ins_encode %{
__ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddLNoRes(indirect mem, Universe dummy, iRegL incr) %{
predicate(n->as_LoadStore()->result_not_used());
match(Set dummy (GetAndAddL mem incr));
ins_cost(2*VOLATILE_REF_COST);
format %{ "get_and_addL noreg, [$mem], $incr" %}
ins_encode %{
__ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddLAcqNoRes(indirect mem, Universe dummy, iRegL incr) %{
predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
match(Set dummy (GetAndAddL mem incr));
ins_cost(VOLATILE_REF_COST);
format %{ "get_and_addL_acq noreg, [$mem], $incr" %}
ins_encode %{
__ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddLConst(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
match(Set newval (GetAndAddL mem incr));
ins_cost(2*VOLATILE_REF_COST+1);
format %{ "get_and_addL $newval, [$mem], $incr" %}
ins_encode %{
__ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddLAcqConst(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
predicate(needs_acquiring_load_exclusive(n));
match(Set newval (GetAndAddL mem incr));
ins_cost(VOLATILE_REF_COST+1);
format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
ins_encode %{
__ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddLNoResConst(indirect mem, Universe dummy, immLAddSub incr) %{
predicate(n->as_LoadStore()->result_not_used());
match(Set dummy (GetAndAddL mem incr));
ins_cost(2*VOLATILE_REF_COST);
format %{ "get_and_addL noreg, [$mem], $incr" %}
ins_encode %{
__ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}
instruct getAndAddLAcqNoResConst(indirect mem, Universe dummy, immLAddSub incr) %{
predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
match(Set dummy (GetAndAddL mem incr));
ins_cost(VOLATILE_REF_COST);
format %{ "get_and_addL_acq noreg, [$mem], $incr" %}
ins_encode %{
__ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}

View File

@ -0,0 +1,246 @@
// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2016, 2021, Red Hat Inc. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//
// BEGIN This file is automatically generated. Do not edit --------------
// Sundry CAS operations. Note that release is always true,
// regardless of the memory ordering of the CAS. This is because we
// need the volatile case to be sequentially consistent but there is
// no trailing StoreLoad barrier emitted by C2. Unfortunately we
// can't check the type of memory ordering here, so we always emit a
// STLXR.
// This section is generated from aarch64_atomic_ad.m4
dnl Return Arg1 with two spaces before it. We need this because m4
dnl strips leading spaces from macro args.
define(`INDENT', ` $1')dnl
dnl
dnl
dnl
dnl ====================== CompareAndExchange*
dnl
define(`CAE_INSN1',
`
instruct compareAndExchange$1$7(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($7,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),`dnl')
match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
ins_cost(`'ifelse($7,Acq,,2*)VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchg$5`'ifelse($7,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::$4, /*acquire*/ ifelse($7,Acq,true,false), /*release*/ true,
/*weak*/ false, $res$$Register);
__ $6($res$$Register, $res$$Register);
%}
ins_pipe(pipe_slow);
%}')dnl
define(`CAE_INSN2',
`
instruct compareAndExchange$1$6(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
$1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
`dnl')
match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
ins_cost(`'ifelse($6,Acq,,2*)VOLATILE_REF_COST);
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchg$5`'ifelse($6,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
/*weak*/ false, $res$$Register);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
CAE_INSN1(B, I, byte, byte, b, sxtbw, )
CAE_INSN1(S, I, short, halfword, s, sxthw, )
CAE_INSN2(I, I, int, word, w, , )
CAE_INSN2(L, L, long, xword, , , )
CAE_INSN2(N, N, narrow oop, word, w, , )
CAE_INSN2(P, P, ptr, xword, , , )
dnl
CAE_INSN1(B, I, byte, byte, b, sxtbw, Acq)
CAE_INSN1(S, I, short, halfword, s, sxthw, Acq)
CAE_INSN2(I, I, int, word, w, Acq)
CAE_INSN2(L, L, long, xword, , Acq)
CAE_INSN2(N, N, narrow oop, word, w, Acq)
CAE_INSN2(P, P, ptr, xword, , Acq)
dnl
dnl
dnl
dnl ====================== (Weak)CompareAndSwap*
dnl
define(`CAS_INSN1',
`
instruct ifelse($7,Weak,'weakCompare`,'compare`)AndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),`dnl')
match(Set res ($7CompareAndSwap$1 mem (Binary oldval newval)));
ins_cost(`'ifelse($6,Acq,,2*)VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg$5`'ifelse($6,Acq,_acq,)`'ifelse($7,Weak,_weak) $res = $mem, $oldval, $newval\t# ($3) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
/*weak*/ ifelse($7,Weak,true,false), noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
define(`CAS_INSN2',
`
instruct ifelse($7,Weak,'weakCompare`,'compare`)AndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
$1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
`dnl')
match(Set res ($7CompareAndSwap$1 mem (Binary oldval newval)));
ins_cost(`'ifelse($6,Acq,,2*)VOLATILE_REF_COST);
effect(KILL cr);
format %{
"cmpxchg$5`'ifelse($6,Acq,_acq,)`'ifelse($7,Weak,_weak) $res = $mem, $oldval, $newval\t# ($3) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
/*weak*/ ifelse($7,Weak,true,false), noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
CAS_INSN1(B, I, byte, byte, b, , )
CAS_INSN1(S, I, short, halfword, s, , )
CAS_INSN2(I, I, int, word, w, , )
CAS_INSN2(L, L, long, xword, , , )
CAS_INSN2(N, N, narrow oop, word, w, , )
CAS_INSN2(P, P, ptr, xword, , , )
dnl
CAS_INSN1(B, I, byte, byte, b, Acq, )
CAS_INSN1(S, I, short, halfword, s, Acq, )
CAS_INSN2(I, I, int, word, w, Acq, )
CAS_INSN2(L, L, long, xword, , Acq, )
CAS_INSN2(N, N, narrow oop, word, w, Acq, )
CAS_INSN2(P, P, ptr, xword, , Acq, )
dnl
CAS_INSN1(B, I, byte, byte, b, , Weak)
CAS_INSN1(S, I, short, halfword, s, , Weak)
CAS_INSN2(I, I, int, word, w, , Weak)
CAS_INSN2(L, L, long, xword, , , Weak)
CAS_INSN2(N, N, narrow oop, word, w, , Weak)
CAS_INSN2(P, P, ptr, xword, , , Weak)
dnl
CAS_INSN1(B, I, byte, byte, b, Acq, Weak)
CAS_INSN1(S, I, short, halfword, s, Acq, Weak)
CAS_INSN2(I, I, int, word, w, Acq, Weak)
CAS_INSN2(L, L, long, xword, , Acq, Weak)
CAS_INSN2(N, N, narrow oop, word, w, Acq, Weak)
CAS_INSN2(P, P, ptr, xword, , Acq, Weak)
dnl
dnl
dnl
dnl ====================== GetAndSet*
dnl
define(`GAS_INSN1',
`
instruct getAndSet$1$3(indirect mem, iReg$1 newval, iReg$1NoSp oldval) %{
ifelse($1$3,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
$1$3,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$3,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
`dnl')
match(Set oldval (GetAndSet$1 mem newval));
ins_cost(`'ifelse($4,Acq,,2*)VOLATILE_REF_COST);
format %{ "atomic_xchg$2`'ifelse($3,Acq,_acq) $oldval, $newval, [$mem]" %}
ins_encode %{
__ atomic_xchg`'ifelse($3,Acq,al)$2($oldval$$Register, $newval$$Register, as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}')dnl
dnl
GAS_INSN1(I, w, )
GAS_INSN1(L, , )
GAS_INSN1(N, w, )
GAS_INSN1(P, , )
dnl
GAS_INSN1(I, w, Acq)
GAS_INSN1(L, , Acq)
GAS_INSN1(N, w, Acq)
GAS_INSN1(P, , Acq)
dnl
dnl
dnl
dnl ====================== GetAndAdd*
dnl
define(`GAA_INSN1',
`
instruct getAndAdd$1$4$5$6(indirect mem, `'ifelse($5,NoRes,Universe dummy,iReg$1NoSp newval), `'ifelse($6,Const,imm$1AddSub incr,iReg$2 incr)) %{
ifelse($4$5,AcqNoRes,INDENT(predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));),
$5,NoRes,INDENT(predicate(n->as_LoadStore()->result_not_used());),
$4,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
`dnl')
match(Set ifelse($5,NoRes,dummy,newval) (GetAndAdd$1 mem incr));
ins_cost(`'ifelse($4,Acq,,2*)VOLATILE_REF_COST`'ifelse($5,NoRes,,+1));
format %{ "get_and_add$1`'ifelse($4,Acq,_acq) `'ifelse($5,NoRes,noreg,$newval), [$mem], $incr" %}
ins_encode %{
__ atomic_add`'ifelse($4,Acq,al)$3(`'ifelse($5,NoRes,noreg,$newval$$Register), `'ifelse($6,Const,$incr$$constant,$incr$$Register), as_Register($mem$$base));
%}
ins_pipe(pipe_serial);
%}')dnl
dnl
dnl
GAA_INSN1(I, IorL2I, w, , , )
GAA_INSN1(I, IorL2I, w, Acq, , )
GAA_INSN1(I, IorL2I, w, , NoRes, )
GAA_INSN1(I, IorL2I, w, Acq, NoRes, )
GAA_INSN1(I, I, w, , , Const)
GAA_INSN1(I, I, w, Acq, , Const)
GAA_INSN1(I, I, w, , NoRes, Const)
GAA_INSN1(I, I, w, Acq, NoRes, Const)
dnl
GAA_INSN1(L, L, , , , )
GAA_INSN1(L, L, , Acq, , )
GAA_INSN1(L, L, , , NoRes, )
GAA_INSN1(L, L, , Acq, NoRes, )
GAA_INSN1(L, L, , , , Const)
GAA_INSN1(L, L, , Acq, , Const)
GAA_INSN1(L, L, , , NoRes, Const)
GAA_INSN1(L, L, , Acq, NoRes, Const)
dnl

View File

@ -1,161 +0,0 @@
dnl Copyright (c) 2016, 2021, Red Hat Inc. All rights reserved.
dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
dnl
dnl This code is free software; you can redistribute it and/or modify it
dnl under the terms of the GNU General Public License version 2 only, as
dnl published by the Free Software Foundation.
dnl
dnl This code is distributed in the hope that it will be useful, but WITHOUT
dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
dnl FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
dnl version 2 for more details (a copy is included in the LICENSE file that
dnl accompanied this code).
dnl
dnl You should have received a copy of the GNU General Public License version
dnl 2 along with this work; if not, write to the Free Software Foundation,
dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
dnl
dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
dnl or visit www.oracle.com if you need additional information or have any
dnl questions.
dnl
dnl
dnl Process this file with m4 cas.m4 to generate the CAE and wCAS
dnl instructions used in aarch64.ad.
dnl
// BEGIN This section of the file is automatically generated. Do not edit --------------
// Sundry CAS operations. Note that release is always true,
// regardless of the memory ordering of the CAS. This is because we
// need the volatile case to be sequentially consistent but there is
// no trailing StoreLoad barrier emitted by C2. Unfortunately we
// can't check the type of memory ordering here, so we always emit a
// STLXR.
// This section is generated from cas.m4
dnl Return Arg1 with two spaces before it. We need this because m4
dnl strips leading spaces from macro args.
define(`INDENT', ` $1')dnl
dnl
define(`CAS_INSN',
`
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct compareAndExchange$1$6(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
$1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
`dnl')
match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
ifelse($6,Acq,'ins_cost(VOLATILE_REF_COST);`,'ins_cost(2 * VOLATILE_REF_COST);`)
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchg$5`'ifelse($6,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
/*weak*/ false, $res$$Register);
%}
ins_pipe(pipe_slow);
%}')dnl
define(`CAS_INSN4',
`
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct compareAndExchange$1$7(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($7,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),`dnl')
match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
ifelse($7,Acq,'ins_cost(VOLATILE_REF_COST);`,'ins_cost(2 * VOLATILE_REF_COST);`)
effect(TEMP_DEF res, KILL cr);
format %{
"cmpxchg$5`'ifelse($7,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::$4, /*acquire*/ ifelse($7,Acq,true,false), /*release*/ true,
/*weak*/ false, $res$$Register);
__ $6($res$$Register, $res$$Register);
%}
ins_pipe(pipe_slow);
%}')dnl
CAS_INSN4(B,I,byte,byte,b,sxtbw)
CAS_INSN4(S,I,short,halfword,s,sxthw)
CAS_INSN(I,I,int,word,w)
CAS_INSN(L,L,long,xword)
CAS_INSN(N,N,narrow oop,word,w)
CAS_INSN(P,P,ptr,xword)
dnl
CAS_INSN4(B,I,byte,byte,b,sxtbw,Acq)
CAS_INSN4(S,I,short,halfword,s,sxthw,Acq)
CAS_INSN(I,I,int,word,w,Acq)
CAS_INSN(L,L,long,xword,,Acq)
CAS_INSN(N,N,narrow oop,word,w,Acq)
CAS_INSN(P,P,ptr,xword,,Acq)
dnl
define(`CAS_INSN2',
`
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),`dnl')
match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval)));
ifelse($6,Acq,'ins_cost(VOLATILE_REF_COST);`,'ins_cost(2 * VOLATILE_REF_COST);`)
effect(KILL cr);
format %{
"cmpxchg$5`'ifelse($6,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}')dnl
define(`CAS_INSN3',
`
// This pattern is generated automatically from cas.m4.
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
$1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
`dnl')
match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval)));
ifelse($6,Acq,'ins_cost(VOLATILE_REF_COST);`,'ins_cost(2 * VOLATILE_REF_COST);`)
effect(KILL cr);
format %{
"cmpxchg$5`'ifelse($6,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
%}
ins_encode %{
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
/*weak*/ true, noreg);
__ csetw($res$$Register, Assembler::EQ);
%}
ins_pipe(pipe_slow);
%}')dnl
CAS_INSN2(B,I,byte,byte,b)
CAS_INSN2(S,I,short,halfword,s)
CAS_INSN3(I,I,int,word,w)
CAS_INSN3(L,L,long,xword)
CAS_INSN3(N,N,narrow oop,word,w)
CAS_INSN3(P,P,ptr,xword)
CAS_INSN2(B,I,byte,byte,b,Acq)
CAS_INSN2(S,I,short,halfword,s,Acq)
CAS_INSN3(I,I,int,word,w,Acq)
CAS_INSN3(L,L,long,xword,,Acq)
CAS_INSN3(N,N,narrow oop,word,w,Acq)
CAS_INSN3(P,P,ptr,xword,,Acq)
dnl
// END This section of the file is automatically generated. Do not edit --------------

View File

@ -1063,6 +1063,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
return false;
}
bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
return false;
}
bool Matcher::is_generic_vector(MachOper* opnd) {
ShouldNotReachHere(); // generic vector operands not supported
return false;

View File

@ -157,6 +157,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I add_va
return result;
}
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<>
template<typename T>
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,

View File

@ -2383,6 +2383,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
return false;
}
bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
return false;
}
bool Matcher::is_generic_vector(MachOper* opnd) {
ShouldNotReachHere(); // generic vector operands not supported
return false;

View File

@ -2053,6 +2053,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
return false;
}
bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
return false;
}
bool Matcher::is_generic_vector(MachOper* opnd) {
ShouldNotReachHere(); // generic vector operands not supported
return false;

View File

@ -1865,6 +1865,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
return false;
}
bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
return false;
}
bool Matcher::is_generic_vector(MachOper* opnd) {
ShouldNotReachHere(); // generic vector operands not supported
return false;

View File

@ -89,10 +89,10 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {
Label done;
Label L_done;
__ testptr(count, count);
__ jcc(Assembler::zero, done);
__ jccb(Assembler::zero, L_done);
// Calculate end address in "count".
Address::ScaleFactor scale = UseCompressedOops ? Address::times_4 : Address::times_8;
@ -111,31 +111,31 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
__ shrptr(count, CardTable::card_shift());
__ addptr(count, tmp);
Label loop;
Label L_loop;
// Iterate from start card to end card (inclusive).
__ bind(loop);
__ bind(L_loop);
Label is_clean_card;
Label L_is_clean_card;
if (UseCondCardMark) {
__ cmpb(Address(addr, 0), G1CardTable::clean_card_val());
__ jcc(Assembler::equal, is_clean_card);
__ jccb(Assembler::equal, L_is_clean_card);
} else {
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());
}
Label next_card;
__ bind(next_card);
Label L_next_card;
__ bind(L_next_card);
__ addptr(addr, sizeof(CardTable::CardValue));
__ cmpptr(addr, count);
__ jcc(Assembler::belowEqual, loop);
__ jmp(done);
__ jccb(Assembler::belowEqual, L_loop);
__ jmpb(L_done);
__ bind(is_clean_card);
// Card was clean. Dirty card and go to next..
__ bind(L_is_clean_card);
// Card was clean. Dirty card and go to next.
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());
__ jmp(next_card);
__ jmpb(L_next_card);
__ bind(done);
__ bind(L_done);
}
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@ -157,22 +157,6 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator
}
}
static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
const Register thread, const Register value, const Register temp) {
// This code assumes that buffer index is pointer sized.
STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t));
// Can we store a value in the given thread's buffer?
// (The index field is typed as size_t.)
__ movptr(temp, Address(thread, in_bytes(index_offset))); // temp := *(index address)
__ testptr(temp, temp); // index == 0?
__ jcc(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer)
// The buffer is not full, store value into it.
__ subptr(temp, wordSize); // temp := next index
__ movptr(Address(thread, in_bytes(index_offset)), temp); // *(index address) := next index
__ addptr(temp, Address(thread, in_bytes(buffer_offset))); // temp := buffer address + next index
__ movptr(Address(temp, 0), value); // *(buffer address + next index) := value
}
static void generate_pre_barrier_fast_path(MacroAssembler* masm,
const Register thread) {
Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
@ -190,21 +174,40 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm,
const Register pre_val,
const Register thread,
const Register tmp,
Label& done,
Label& runtime) {
Label& L_done) {
Address index_addr(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
Address buffer_addr(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
// This code assumes that buffer index is pointer sized.
STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t));
Label L_runtime;
// Do we need to load the previous value?
if (obj != noreg) {
__ load_heap_oop(pre_val, Address(obj, 0), noreg, AS_RAW);
}
// Is the previous value null?
__ cmpptr(pre_val, NULL_WORD);
__ jcc(Assembler::equal, done);
generate_queue_insertion(masm,
G1ThreadLocalData::satb_mark_queue_index_offset(),
G1ThreadLocalData::satb_mark_queue_buffer_offset(),
runtime,
thread, pre_val, tmp);
__ jmp(done);
__ testptr(pre_val, pre_val);
__ jcc(Assembler::equal, L_done);
// Can we store a value in the given thread's buffer?
// (The index field is typed as size_t.)
__ movptr(tmp, index_addr); // temp := *(index address)
__ testptr(tmp, tmp); // index == 0?
__ jccb(Assembler::zero, L_runtime); // jump to runtime if index == 0 (full buffer)
// The buffer is not full, store value into it.
__ subptr(tmp, wordSize); // temp := next index
__ movptr(index_addr, tmp); // *(index address) := next index
__ addptr(tmp, buffer_addr); // temp := buffer address + next index
__ movptr(Address(tmp, 0), pre_val); // *(buffer address + next index) := value
// Jump out if done, or fall-through to runtime.
// "L_done" is far away, so jump cannot be short.
__ jmp(L_done);
__ bind(L_runtime);
}
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
@ -219,7 +222,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
const Register thread = r15_thread;
Label done;
Label runtime;
assert(pre_val != noreg, "check this code");
@ -231,9 +233,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
generate_pre_barrier_fast_path(masm, thread);
// If marking is not active (*(mark queue active address) == 0), jump to done
__ jcc(Assembler::equal, done);
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done, runtime);
__ bind(runtime);
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done);
// Determine and save the live input values
__ push_call_clobbered_registers();
@ -272,23 +272,23 @@ static void generate_post_barrier(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
const Register tmp1,
Label& done,
bool new_val_may_be_null) {
assert_different_registers(store_addr, new_val, tmp1, noreg);
Register thread = r15_thread;
Label L_done;
// Does store cross heap regions?
__ movptr(tmp1, store_addr); // tmp1 := store address
__ xorptr(tmp1, new_val); // tmp1 := store address ^ new value
__ shrptr(tmp1, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
__ jcc(Assembler::equal, done);
__ jccb(Assembler::equal, L_done);
// Crosses regions, storing null?
if (new_val_may_be_null) {
__ cmpptr(new_val, NULL_WORD); // new value == null?
__ jcc(Assembler::equal, done);
__ testptr(new_val, new_val); // new value == null?
__ jccb(Assembler::equal, L_done);
}
__ movptr(tmp1, store_addr); // tmp1 := store address
@ -298,20 +298,19 @@ static void generate_post_barrier(MacroAssembler* masm,
__ addptr(tmp1, card_table_addr); // tmp1 := card address
if (UseCondCardMark) {
__ cmpb(Address(tmp1, 0), G1CardTable::clean_card_val()); // *(card address) == clean_card_val?
__ jcc(Assembler::notEqual, done);
__ jccb(Assembler::notEqual, L_done);
}
// Storing a region crossing, non-null oop, card is clean.
// Dirty card.
__ movb(Address(tmp1, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
__ bind(L_done);
}
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register tmp) {
Label done;
generate_post_barrier(masm, store_addr, new_val, tmp, done, true /* new_val_may_be_null */);
__ bind(done);
generate_post_barrier(masm, store_addr, new_val, tmp, true /* new_val_may_be_null */);
}
#if defined(COMPILER2)
@ -354,7 +353,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
G1PreBarrierStubC2* stub) const {
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
Label runtime;
Register obj = stub->obj();
Register pre_val = stub->pre_val();
Register thread = stub->thread();
@ -362,9 +360,8 @@ void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
assert(stub->tmp2() == noreg, "not needed in this platform");
__ bind(*stub->entry());
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation(), runtime);
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation());
__ bind(runtime);
generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
__ jmp(*stub->continuation());
}
@ -374,9 +371,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register new_val,
Register tmp,
bool new_val_may_be_null) {
Label done;
generate_post_barrier(masm, store_addr, new_val, tmp, done, new_val_may_be_null);
__ bind(done);
generate_post_barrier(masm, store_addr, new_val, tmp, new_val_may_be_null);
}
#endif // COMPILER2
@ -449,7 +444,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
}
__ cmpptr(pre_val_reg, NULL_WORD);
__ testptr(pre_val_reg, pre_val_reg);
__ jcc(Assembler::equal, *stub->continuation());
ce->store_parameter(stub->pre_val()->as_register(), 0);
__ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
@ -465,9 +460,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2 /* unused on x86 */) {
Label done;
generate_post_barrier(masm, store_addr, new_val, tmp1, done, true /* new_val_may_be_null */);
masm->bind(done);
generate_post_barrier(masm, store_addr, new_val, tmp1, true /* new_val_may_be_null */);
}
#define __ sasm->
@ -490,8 +483,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
Label done;
Label runtime;
Label L_done, L_runtime;
// Is marking still active?
if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
@ -500,13 +492,13 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
__ cmpb(queue_active, 0);
}
__ jcc(Assembler::equal, done);
__ jcc(Assembler::equal, L_done);
// Can we store original value in the thread's buffer?
__ movptr(tmp, queue_index);
__ testptr(tmp, tmp);
__ jcc(Assembler::zero, runtime);
__ jccb(Assembler::zero, L_runtime);
__ subptr(tmp, wordSize);
__ movptr(queue_index, tmp);
__ addptr(tmp, buffer);
@ -514,9 +506,9 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
// prev_val (rax)
__ load_parameter(0, pre_val);
__ movptr(Address(tmp, 0), pre_val);
__ jmp(done);
__ jmp(L_done);
__ bind(runtime);
__ bind(L_runtime);
__ push_call_clobbered_registers();
@ -526,7 +518,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ pop_call_clobbered_registers();
__ bind(done);
__ bind(L_done);
__ pop_ppx(rdx);
__ pop_ppx(rax);

View File

@ -3524,10 +3524,10 @@ void StubGenerator::aesgcm_avx512(Register in, Register len, Register ct, Regist
false, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32);
ghash16_avx512(false, true, false, false, true, in, pos, avx512_subkeyHtbl, AAD_HASHx, SHUF_MASK, stack_offset, 16 * 16, 0, HashKey_16);
__ addl(pos, 16 * 16);
__ bind(MESG_BELOW_32_BLKS);
__ subl(len, 16 * 16);
__ addl(pos, 16 * 16);
gcm_enc_dec_last_avx512(len, in, pos, AAD_HASHx, SHUF_MASK, avx512_subkeyHtbl, ghashin_offset, HashKey_16, true, true);
__ bind(GHASH_DONE);
@ -4016,13 +4016,15 @@ void StubGenerator::aesgcm_avx2(Register in, Register len, Register ct, Register
const Register rounds = r10;
const XMMRegister ctr_blockx = xmm9;
const XMMRegister aad_hashx = xmm8;
Label encrypt_done, encrypt_by_8_new, encrypt_by_8;
Label encrypt_done, encrypt_by_8_new, encrypt_by_8, exit;
//This routine should be called only for message sizes of 128 bytes or more.
//Macro flow:
//process 8 16 byte blocks in initial_num_blocks.
//process 8 16 byte blocks at a time until all are done 'encrypt_by_8_new followed by ghash_last_8'
__ xorl(pos, pos);
__ cmpl(len, 128);
__ jcc(Assembler::less, exit);
//Generate 8 constants for htbl
generateHtbl_8_block_avx2(subkeyHtbl);
@ -4090,6 +4092,7 @@ void StubGenerator::aesgcm_avx2(Register in, Register len, Register ct, Register
__ vpxor(xmm0, xmm0, xmm0, Assembler::AVX_128bit);
__ vpxor(xmm13, xmm13, xmm13, Assembler::AVX_128bit);
__ bind(exit);
}
#undef __

View File

@ -2633,6 +2633,70 @@ bool Matcher::supports_vector_calling_convention(void) {
return EnableVectorSupport;
}
static bool is_ndd_demotable(const MachNode* mdef) {
return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
}
static bool is_ndd_demotable_commutative(const MachNode* mdef) {
return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
}
static bool is_demotion_candidate(const MachNode* mdef) {
return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
}
bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
int oper_index) {
if (mdef == nullptr) {
return false;
}
if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
mdef->in(mdef->operand_index(oper_index)) == nullptr) {
assert(oper_index != 1 || !is_demotion_candidate(mdef), "%s", mdef->Name());
assert(oper_index != 2 || !is_ndd_demotable_commutative(mdef), "%s", mdef->Name());
return false;
}
// Complex memory operand covers multiple incoming edges needed for
// address computation. Biasing def towards any address component will not
// result in NDD demotion by assembler.
if (mdef->operand_num_edges(oper_index) != 1) {
assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
return false;
}
// Demotion candidate must be register mask compatible with definition.
const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
if (!oper_mask.overlap(mdef->out_RegMask())) {
assert(!is_demotion_candidate(mdef), "%s", mdef->Name());
return false;
}
switch (oper_index) {
// First operand of MachNode corresponding to Intel APX NDD selection
// pattern can share its assigned register with definition operand if
// their live ranges do not overlap. In such a scenario we can demote
// it to legacy map0/map1 instruction by replacing its 4-byte extended
// EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
// are decorated with a special flag by instruction selector.
case 1:
return is_demotion_candidate(mdef);
// Definition operand of commutative operation can be biased towards second
// operand.
case 2:
return is_ndd_demotable_commutative(mdef);
// Current scheme only selects up to two biasing candidates
default:
assert(false, "unhandled operand index: %s", mdef->Name());
break;
}
return false;
}
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
assert(EnableVectorSupport, "sanity");
int lo = XMM0_num;
@ -2812,7 +2876,7 @@ static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_byte
class Node::PD {
public:
enum NodeFlags {
enum NodeFlags : uint64_t {
Flag_intel_jcc_erratum = Node::_last_flag << 1,
Flag_sets_carry_flag = Node::_last_flag << 2,
Flag_sets_parity_flag = Node::_last_flag << 3,
@ -2824,7 +2888,9 @@ public:
Flag_clears_zero_flag = Node::_last_flag << 9,
Flag_clears_overflow_flag = Node::_last_flag << 10,
Flag_clears_sign_flag = Node::_last_flag << 11,
_last_flag = Flag_clears_sign_flag
Flag_ndd_demotable = Node::_last_flag << 12,
Flag_ndd_demotable_commutative = Node::_last_flag << 13,
_last_flag = Flag_ndd_demotable_commutative
};
};
@ -9801,7 +9867,7 @@ instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AddI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@ -9829,7 +9895,7 @@ instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AddI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@ -9872,7 +9938,7 @@ instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AddI src1 (LoadI src2)));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
@ -9929,6 +9995,7 @@ instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
predicate(UseAPX && UseIncDec);
match(Set dst (AddI src val));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "eincl $dst, $src\t# int ndd" %}
ins_encode %{
@ -9983,6 +10050,7 @@ instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
predicate(UseAPX && UseIncDec);
match(Set dst (AddI src val));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "edecl $dst, $src\t# int ndd" %}
ins_encode %{
@ -10089,7 +10157,7 @@ instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AddL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@ -10117,7 +10185,7 @@ instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AddL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@ -10160,7 +10228,7 @@ instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AddL src1 (LoadL src2)));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
@ -10216,6 +10284,7 @@ instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
predicate(UseAPX && UseIncDec);
match(Set dst (AddL src val));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "eincq $dst, $src\t# long ndd" %}
ins_encode %{
@ -10270,6 +10339,7 @@ instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
predicate(UseAPX && UseIncDec);
match(Set dst (AddL src val));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "edecq $dst, $src\t# long ndd" %}
ins_encode %{
@ -10984,7 +11054,7 @@ instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (SubI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@ -10998,7 +11068,7 @@ instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (SubI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@ -11041,7 +11111,7 @@ instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (SubI src1 (LoadI src2)));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
ins_cost(150);
format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
@ -11099,7 +11169,7 @@ instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (SubL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@ -11113,7 +11183,7 @@ instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (SubL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@ -11156,7 +11226,7 @@ instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (SubL src1 (LoadL src2)));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
ins_cost(150);
format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
@ -11228,7 +11298,7 @@ instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (SubI zero src));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "enegl $dst, $src\t# int ndd" %}
ins_encode %{
@ -11256,7 +11326,7 @@ instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (NegI src));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "enegl $dst, $src\t# int ndd" %}
ins_encode %{
@ -11297,7 +11367,7 @@ instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (SubL zero src));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "enegq $dst, $src\t# long ndd" %}
ins_encode %{
@ -11325,7 +11395,7 @@ instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (NegL src));
effect(KILL cr);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
format %{ "enegq $dst, $src\t# long ndd" %}
ins_encode %{
@ -11370,6 +11440,7 @@ instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (MulI src1 src2));
effect(KILL cr);
flag(PD::Flag_ndd_demotable_commutative);
ins_cost(300);
format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
@ -11411,6 +11482,7 @@ instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (MulI src1 (LoadI src2)));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
ins_cost(350);
format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
@ -11462,6 +11534,7 @@ instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (MulL src1 src2));
effect(KILL cr);
flag(PD::Flag_ndd_demotable_commutative);
ins_cost(300);
format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
@ -11503,6 +11576,7 @@ instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (MulL src1 (LoadL src2)));
effect(KILL cr);
flag(PD::Flag_ndd_demotable_commutative);
ins_cost(350);
format %{ "eimulq $dst, $src1, $src2 \t# long" %}
@ -11777,6 +11851,7 @@ instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (LShiftI src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
ins_encode %{
@ -11805,6 +11880,7 @@ instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (LShiftI src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
ins_encode %{
@ -11911,6 +11987,7 @@ instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (RShiftI src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
ins_encode %{
@ -12017,6 +12094,7 @@ instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (URShiftI src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
ins_encode %{
@ -12124,6 +12202,7 @@ instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (LShiftL src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
ins_encode %{
@ -12152,6 +12231,7 @@ instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (LShiftL src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
ins_encode %{
@ -12258,6 +12338,7 @@ instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (RShiftL src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
ins_encode %{
@ -12364,6 +12445,7 @@ instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (URShiftL src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
ins_encode %{
@ -12535,6 +12617,7 @@ instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
match(Set dst (RotateLeft src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
ins_encode %{
@ -12599,6 +12682,7 @@ instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
match(Set dst (RotateRight src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
ins_encode %{
@ -12651,6 +12735,7 @@ instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
match(Set dst (RotateLeft dst shift));
effect(KILL cr);
format %{ "rolq $dst, $shift" %}
ins_encode %{
__ rolq($dst$$Register);
@ -12664,6 +12749,7 @@ instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
match(Set dst (RotateLeft src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
ins_encode %{
@ -12728,6 +12814,7 @@ instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
match(Set dst (RotateRight src shift));
effect(KILL cr);
flag(PD::Flag_ndd_demotable);
format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
ins_encode %{
@ -12805,7 +12892,7 @@ instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AndI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@ -12898,7 +12985,7 @@ instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AndI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@ -12942,7 +13029,7 @@ instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AndI src1 (LoadI src2)));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
@ -13142,7 +13229,7 @@ instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@ -13171,7 +13258,7 @@ instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@ -13185,7 +13272,7 @@ instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
ins_encode %{
@ -13229,7 +13316,7 @@ instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrI src1 (LoadI src2)));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
ins_cost(150);
format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
@ -13305,7 +13392,7 @@ instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (XorI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@ -13331,6 +13418,7 @@ instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
%{
match(Set dst (XorI src imm));
predicate(UseAPX);
flag(PD::Flag_ndd_demotable);
format %{ "enotl $dst, $src" %}
ins_encode %{
@ -13361,7 +13449,7 @@ instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
match(Set dst (XorI src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
ins_encode %{
@ -13407,7 +13495,7 @@ instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (XorI src1 (LoadI src2)));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
ins_cost(150);
format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
@ -13486,7 +13574,7 @@ instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (AndL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@ -13542,7 +13630,7 @@ instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AndL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@ -13586,7 +13674,7 @@ instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (AndL src1 (LoadL src2)));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
@ -13789,7 +13877,7 @@ instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@ -13844,7 +13932,7 @@ instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@ -13858,7 +13946,7 @@ instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
ins_encode %{
@ -13903,7 +13991,7 @@ instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (OrL src1 (LoadL src2)));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
@ -13982,7 +14070,7 @@ instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
predicate(UseAPX);
match(Set dst (XorL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@ -14008,6 +14096,7 @@ instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
%{
predicate(UseAPX);
match(Set dst (XorL src imm));
flag(PD::Flag_ndd_demotable);
format %{ "enotq $dst, $src" %}
ins_encode %{
@ -14038,7 +14127,7 @@ instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
match(Set dst (XorL src1 src2));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
ins_encode %{
@ -14084,7 +14173,7 @@ instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
predicate(UseAPX);
match(Set dst (XorL src1 (LoadL src2)));
effect(KILL cr);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
ins_cost(150);
format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
@ -16539,6 +16628,7 @@ instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
predicate(UseAPX);
match(Set dst (MinI src1 src2));
effect(DEF dst, USE src1, USE src2);
flag(PD::Flag_ndd_demotable);
ins_cost(200);
expand %{
@ -16590,6 +16680,7 @@ instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
predicate(UseAPX);
match(Set dst (MaxI src1 src2));
effect(DEF dst, USE src1, USE src2);
flag(PD::Flag_ndd_demotable);
ins_cost(200);
expand %{

View File

@ -1038,6 +1038,8 @@ static void* dll_load_library(const char *filename, int *eno, char *ebuf, int eb
dflags |= RTLD_MEMBER;
}
Events::log_dll_message(nullptr, "Attempting to load shared library %s", filename);
void* result;
const char* error_report = nullptr;
JFR_ONLY(NativeLibraryLoadEvent load_event(filename, &result);)

View File

@ -1035,6 +1035,8 @@ void *os::Bsd::dlopen_helper(const char *filename, int mode, char *ebuf, int ebu
int rtn = fegetenv(&default_fenv);
assert(rtn == 0, "fegetenv must succeed");
Events::log_dll_message(nullptr, "Attempting to load shared library %s", filename);
void* result;
JFR_ONLY(NativeLibraryLoadEvent load_event(filename, &result);)
result = ::dlopen(filename, RTLD_LAZY);

View File

@ -159,9 +159,7 @@ physical_memory_size_type os::Linux::_physical_memory = 0;
address os::Linux::_initial_thread_stack_bottom = nullptr;
uintptr_t os::Linux::_initial_thread_stack_size = 0;
int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = nullptr;
pthread_t os::Linux::_main_thread;
bool os::Linux::_supports_fast_thread_cpu_time = false;
const char * os::Linux::_libc_version = nullptr;
const char * os::Linux::_libpthread_version = nullptr;
@ -1475,29 +1473,6 @@ void os::Linux::capture_initial_stack(size_t max_size) {
////////////////////////////////////////////////////////////////////////////////
// time support
void os::Linux::fast_thread_clock_init() {
clockid_t clockid;
struct timespec tp;
int (*pthread_getcpuclockid_func)(pthread_t, clockid_t *) =
(int(*)(pthread_t, clockid_t *)) dlsym(RTLD_DEFAULT, "pthread_getcpuclockid");
// Switch to using fast clocks for thread cpu time if
// the clock_getres() returns 0 error code.
// Note, that some kernels may support the current thread
// clock (CLOCK_THREAD_CPUTIME_ID) but not the clocks
// returned by the pthread_getcpuclockid().
// If the fast POSIX clocks are supported then the clock_getres()
// must return at least tp.tv_sec == 0 which means a resolution
// better than 1 sec. This is extra check for reliability.
if (pthread_getcpuclockid_func &&
pthread_getcpuclockid_func(_main_thread, &clockid) == 0 &&
clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) {
_supports_fast_thread_cpu_time = true;
_pthread_getcpuclockid = pthread_getcpuclockid_func;
}
}
// thread_id is kernel thread id (similar to Solaris LWP id)
intx os::current_thread_id() { return os::Linux::gettid(); }
int os::current_process_id() {
@ -1900,6 +1875,8 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
assert(rtn == 0, "fegetenv must succeed");
#endif // IA32
Events::log_dll_message(nullptr, "Attempting to load shared library %s", filename);
void* result;
JFR_ONLY(NativeLibraryLoadEvent load_event(filename, &result);)
result = ::dlopen(filename, RTLD_LAZY);
@ -4328,7 +4305,7 @@ OSReturn os::get_native_priority(const Thread* const thread,
// For reference, please, see IEEE Std 1003.1-2004:
// http://www.unix.org/single_unix_specification
jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
jlong os::Linux::total_thread_cpu_time(clockid_t clockid) {
struct timespec tp;
int status = clock_gettime(clockid, &tp);
assert(status == 0, "clock_gettime error: %s", os::strerror(errno));
@ -4556,8 +4533,6 @@ jint os::init_2(void) {
os::Posix::init_2();
Linux::fast_thread_clock_init();
if (PosixSignals::init() == JNI_ERR) {
return JNI_ERR;
}
@ -4985,14 +4960,14 @@ int os::open(const char *path, int oflag, int mode) {
return fd;
}
static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time);
static jlong user_thread_cpu_time(Thread *thread);
static jlong fast_cpu_time(Thread *thread) {
static jlong total_thread_cpu_time(Thread *thread) {
clockid_t clockid;
int rc = os::Linux::pthread_getcpuclockid(thread->osthread()->pthread_id(),
int rc = pthread_getcpuclockid(thread->osthread()->pthread_id(),
&clockid);
if (rc == 0) {
return os::Linux::fast_thread_cpu_time(clockid);
return os::Linux::total_thread_cpu_time(clockid);
} else {
// It's possible to encounter a terminated native thread that failed
// to detach itself from the VM - which should result in ESRCH.
@ -5009,41 +4984,31 @@ static jlong fast_cpu_time(Thread *thread) {
// the fast estimate available on the platform.
jlong os::current_thread_cpu_time() {
if (os::Linux::supports_fast_thread_cpu_time()) {
return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
} else {
// return user + sys since the cost is the same
return slow_thread_cpu_time(Thread::current(), true /* user + sys */);
}
return os::Linux::total_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
}
jlong os::thread_cpu_time(Thread* thread) {
// consistent with what current_thread_cpu_time() returns
if (os::Linux::supports_fast_thread_cpu_time()) {
return fast_cpu_time(thread);
} else {
return slow_thread_cpu_time(thread, true /* user + sys */);
}
return total_thread_cpu_time(thread);
}
jlong os::current_thread_cpu_time(bool user_sys_cpu_time) {
if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) {
return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
if (user_sys_cpu_time) {
return os::Linux::total_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
} else {
return slow_thread_cpu_time(Thread::current(), user_sys_cpu_time);
return user_thread_cpu_time(Thread::current());
}
}
jlong os::thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) {
return fast_cpu_time(thread);
if (user_sys_cpu_time) {
return total_thread_cpu_time(thread);
} else {
return slow_thread_cpu_time(thread, user_sys_cpu_time);
return user_thread_cpu_time(thread);
}
}
// -1 on error.
static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
static jlong user_thread_cpu_time(Thread *thread) {
pid_t tid = thread->osthread()->thread_id();
char *s;
char stat[2048];
@ -5080,11 +5045,8 @@ static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
&ldummy, &ldummy, &ldummy, &ldummy, &ldummy,
&user_time, &sys_time);
if (count != 13) return -1;
if (user_sys_cpu_time) {
return ((jlong)sys_time + (jlong)user_time) * (1000000000 / os::Posix::clock_tics_per_second());
} else {
return (jlong)user_time * (1000000000 / os::Posix::clock_tics_per_second());
}
return (jlong)user_time * (1000000000 / os::Posix::clock_tics_per_second());
}
void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
@ -5163,7 +5125,7 @@ int os::get_core_path(char* buffer, size_t bufferSize) {
if (core_pattern[0] == '|') {
written = jio_snprintf(buffer, bufferSize,
"\"%s\" (or dumping to %s/core.%d)",
"\"%s\" (alternatively, falling back to %s/core.%d)",
&core_pattern[1], p, current_process_id());
} else if (pid_pos != nullptr) {
*pid_pos = '\0';

View File

@ -32,16 +32,12 @@
class os::Linux {
friend class os;
static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *);
static address _initial_thread_stack_bottom;
static uintptr_t _initial_thread_stack_size;
static const char *_libc_version;
static const char *_libpthread_version;
static bool _supports_fast_thread_cpu_time;
static GrowableArray<int>* _cpu_to_node;
static GrowableArray<int>* _nindex_to_node;
@ -146,18 +142,7 @@ class os::Linux {
static bool manually_expand_stack(JavaThread * t, address addr);
static void expand_stack_to(address bottom);
// fast POSIX clocks support
static void fast_thread_clock_init(void);
static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) {
return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1;
}
static bool supports_fast_thread_cpu_time() {
return _supports_fast_thread_cpu_time;
}
static jlong fast_thread_cpu_time(clockid_t clockid);
static jlong total_thread_cpu_time(clockid_t clockid);
static jlong sendfile(int out_fd, int in_fd, jlong* offset, jlong count);

View File

@ -50,7 +50,14 @@ ProcSmapsParser::~ProcSmapsParser() {
bool ProcSmapsParser::read_line() {
_line[0] = '\0';
return ::fgets(_line, _linelen, _f) != nullptr;
if (::fgets(_line, _linelen, _f) == nullptr) {
// On error or EOF, ensure deterministic empty buffer
_line[0] = '\0';
return false;
} else {
return true;
}
}
bool ProcSmapsParser::is_header_line() {
@ -101,8 +108,6 @@ void ProcSmapsParser::scan_additional_line(ProcSmapsInfo& out) {
}
}
// Starts or continues parsing. Returns true on success,
// false on EOF or on error.
bool ProcSmapsParser::parse_next(ProcSmapsInfo& out) {
// Information about a single mapping reaches across several lines.
@ -117,15 +122,13 @@ bool ProcSmapsParser::parse_next(ProcSmapsInfo& out) {
assert(is_header_line(), "Not a header line: \"%s\".", _line);
scan_header_line(out);
// Now read until we encounter the next header line or EOF or an error.
bool ok = false, stop = false;
do {
ok = read_line();
stop = !ok || is_header_line();
if (!stop) {
scan_additional_line(out);
while (true) {
bool ok = read_line();
if (!ok || is_header_line()) {
break; // EOF or next header
}
} while (!stop);
scan_additional_line(out);
}
return ok;
return true; // always return true if a mapping was parsed
}

View File

@ -84,8 +84,7 @@ public:
ProcSmapsParser(FILE* f);
~ProcSmapsParser();
// Starts or continues parsing. Returns true on success,
// false on EOF or on error.
// Starts or continues parsing. Returns true iff a mapping was parsed.
bool parse_next(ProcSmapsInfo& out);
};

View File

@ -108,41 +108,60 @@ size_t os::_os_min_stack_allowed = PTHREAD_STACK_MIN;
// Check core dump limit and report possible place where core can be found
void os::check_core_dump_prerequisites(char* buffer, size_t bufferSize, bool check_only) {
stringStream buf(buffer, bufferSize);
if (!FLAG_IS_DEFAULT(CreateCoredumpOnCrash) && !CreateCoredumpOnCrash) {
jio_snprintf(buffer, bufferSize, "CreateCoredumpOnCrash is disabled from command line");
VMError::record_coredump_status(buffer, false);
buf.print("CreateCoredumpOnCrash is disabled from command line");
VMError::record_coredump_status(buf.freeze(), false);
} else {
struct rlimit rlim;
bool success = true;
bool warn = true;
char core_path[PATH_MAX];
if (get_core_path(core_path, PATH_MAX) <= 0) {
jio_snprintf(buffer, bufferSize, "core.%d (may not exist)", current_process_id());
// In the warning message, let the user know.
if (check_only) {
buf.print("the core path couldn't be determined. It commonly defaults to ");
}
buf.print("core.%d%s", current_process_id(), check_only ? "" : " (may not exist)");
#ifdef LINUX
} else if (core_path[0] == '"') { // redirect to user process
jio_snprintf(buffer, bufferSize, "Core dumps may be processed with %s", core_path);
if (check_only) {
buf.print("core dumps may be further processed by the following: ");
} else {
buf.print("Determined by the following: ");
}
buf.print("%s", core_path);
#endif
} else if (getrlimit(RLIMIT_CORE, &rlim) != 0) {
jio_snprintf(buffer, bufferSize, "%s (may not exist)", core_path);
if (check_only) {
buf.print("the rlimit couldn't be determined. If resource limits permit, the core dump will be located at ");
}
buf.print("%s%s", core_path, check_only ? "" : " (may not exist)");
} else {
switch(rlim.rlim_cur) {
case RLIM_INFINITY:
jio_snprintf(buffer, bufferSize, "%s", core_path);
buf.print("%s", core_path);
warn = false;
break;
case 0:
jio_snprintf(buffer, bufferSize, "Core dumps have been disabled. To enable core dumping, try \"ulimit -c unlimited\" before starting Java again");
buf.print("%s dumps have been disabled. To enable core dumping, try \"ulimit -c unlimited\" before starting Java again", check_only ? "core" : "Core");
success = false;
break;
default:
jio_snprintf(buffer, bufferSize, "%s (max size " UINT64_FORMAT " k). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", core_path, uint64_t(rlim.rlim_cur) / K);
if (check_only) {
buf.print("core dumps are constrained ");
} else {
buf.print( "%s ", core_path);
}
buf.print( "(max size " UINT64_FORMAT " k). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", uint64_t(rlim.rlim_cur) / K);
break;
}
}
const char* result = buf.freeze();
if (!check_only) {
VMError::record_coredump_status(buffer, success);
VMError::record_coredump_status(result, success);
} else if (warn) {
warning("CreateCoredumpOnCrash specified, but %s", buffer);
warning("CreateCoredumpOnCrash specified, but %s", result);
}
}
}

View File

@ -1715,6 +1715,8 @@ static int _print_module(const char* fname, address base_address,
// same architecture as Hotspot is running on
void * os::dll_load(const char *name, char *ebuf, int ebuflen) {
log_info(os)("attempting shared library load of %s", name);
Events::log_dll_message(nullptr, "Attempting to load shared library %s", name);
void* result;
JFR_ONLY(NativeLibraryLoadEvent load_event(name, &result);)
result = LoadLibrary(name);

View File

@ -50,11 +50,9 @@ double SharedRuntime::fmod_winx64(double x, double y)
hx ^= sx; /* |x| */
hy &= 0x7fffffff; /* |y| */
#pragma warning( disable : 4146 )
/* purge off exception values */
if ((hy | ly) == 0 || (hx >= 0x7ff00000) || /* y=0,or x not finite */
((hy | ((ly | -ly) >> 31))>0x7ff00000)) /* or y is NaN */
#pragma warning( default : 4146 )
((hy | ((ly | -ly) >> 31))>0x7ff00000)) /* or y is NaN */
return (x*y) / (x*y);
if (hx <= hy) {
if ((hx<hy) || (lx<ly)) return x; /* |x|<|y| return x */

View File

@ -52,12 +52,16 @@ struct AtomicAccess::PlatformAdd {
}
};
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<size_t byte_size>
template<typename T>
inline T AtomicAccess::PlatformXchg<byte_size>::operator()(T volatile* dest,
T exchange_value,
atomic_memory_order order) const {
STATIC_ASSERT(byte_size == sizeof(T));
STATIC_ASSERT(byte_size == 4 || byte_size == 8);
T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
FULL_MEM_BARRIER;
return res;

View File

@ -52,6 +52,9 @@ inline D AtomicAccess::PlatformAdd<4>::fetch_then_add(D volatile* dest, I add_va
return old_value;
}
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<>
template<typename T>
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,

View File

@ -66,6 +66,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I add_va
return res;
}
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<>
template<typename T>
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,

View File

@ -113,6 +113,9 @@ inline D AtomicAccess::PlatformAdd<8>::fetch_then_add(D volatile* dest, I add_va
return atomic_fastcall(stub, dest, add_value);
}
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<>
template<typename T>
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,

View File

@ -118,6 +118,8 @@ inline D AtomicAccess::PlatformAdd<4>::add_then_fetch(D volatile* dest, I add_va
return add_using_helper<int32_t>(ARMAtomicFuncs::_add_func, dest, add_value);
}
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<>
template<typename T>

View File

@ -152,6 +152,9 @@ inline T AtomicAccess::PlatformCmpxchg<4>::operator()(T volatile* dest __attribu
}
#endif
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<size_t byte_size>
template<typename T>
inline T AtomicAccess::PlatformXchg<byte_size>::operator()(T volatile* dest,
@ -164,6 +167,7 @@ inline T AtomicAccess::PlatformXchg<byte_size>::operator()(T volatile* dest,
#endif
STATIC_ASSERT(byte_size == sizeof(T));
STATIC_ASSERT(byte_size == 4 || byte_size == 8);
if (order != memory_order_relaxed) {
FULL_MEM_BARRIER;

View File

@ -209,6 +209,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I inc,
//
// The return value is the (unchanged) value from memory as it was when the
// replacement succeeded.
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<>
template<typename T>
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,

View File

@ -52,6 +52,9 @@ inline D AtomicAccess::PlatformAdd<4>::fetch_then_add(D volatile* dest, I add_va
return old_value;
}
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<>
template<typename T>
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,

View File

@ -65,6 +65,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I add_va
return res;
}
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
template<>
template<typename T>
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,

View File

@ -68,6 +68,9 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
#undef DEFINE_INTRINSIC_ADD
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
#define DEFINE_INTRINSIC_XCHG(IntrinsicName, IntrinsicType) \
template<> \
template<typename T> \
@ -75,6 +78,8 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
T exchange_value, \
atomic_memory_order order) const { \
STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(T)); \
STATIC_ASSERT(sizeof(IntrinsicType) == 4 || \
sizeof(IntrinsicType) == 8); \
return PrimitiveConversions::cast<T>( \
IntrinsicName(reinterpret_cast<IntrinsicType volatile *>(dest), \
PrimitiveConversions::cast<IntrinsicType>(exchange_value))); \

View File

@ -70,6 +70,9 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
#undef DEFINE_INTRINSIC_ADD
template<>
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
#define DEFINE_INTRINSIC_XCHG(IntrinsicName, IntrinsicType) \
template<> \
template<typename T> \
@ -77,6 +80,8 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
T exchange_value, \
atomic_memory_order order) const { \
STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(T)); \
STATIC_ASSERT(sizeof(IntrinsicType) == 4 || \
sizeof(IntrinsicType) == 8); \
return PrimitiveConversions::cast<T>( \
IntrinsicName(reinterpret_cast<IntrinsicType volatile *>(dest), \
PrimitiveConversions::cast<IntrinsicType>(exchange_value))); \

View File

@ -73,7 +73,7 @@ void ResolutionErrorTable::add_entry(const constantPoolHandle& pool, int cp_inde
ResolutionErrorKey key(pool(), cp_index);
ResolutionErrorEntry *entry = new ResolutionErrorEntry(error, message, cause, cause_msg);
_resolution_error_table->put(key, entry);
_resolution_error_table->put_when_absent(key, entry);
}
// create new nest host error entry
@ -85,7 +85,7 @@ void ResolutionErrorTable::add_entry(const constantPoolHandle& pool, int cp_inde
ResolutionErrorKey key(pool(), cp_index);
ResolutionErrorEntry *entry = new ResolutionErrorEntry(message);
_resolution_error_table->put(key, entry);
_resolution_error_table->put_when_absent(key, entry);
}
// find entry in the table
@ -126,6 +126,15 @@ ResolutionErrorEntry::~ResolutionErrorEntry() {
}
}
void ResolutionErrorEntry::set_nest_host_error(const char* message) {
// If a message is already set, free it.
if (nest_host_error() != nullptr) {
FREE_C_HEAP_ARRAY(char, _nest_host_error);
}
_nest_host_error = message;
}
class ResolutionErrorDeleteIterate : StackObj {
ConstantPool* p;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -91,10 +91,7 @@ class ResolutionErrorEntry : public CHeapObj<mtClass> {
~ResolutionErrorEntry();
// The incoming nest host error message is already in the C-Heap.
void set_nest_host_error(const char* message) {
_nest_host_error = message;
}
void set_nest_host_error(const char* message);
Symbol* error() const { return _error; }
const char* message() const { return _message; }

View File

@ -1864,14 +1864,19 @@ void SystemDictionary::add_nest_host_error(const constantPoolHandle& pool,
{
MutexLocker ml(Thread::current(), SystemDictionary_lock);
ResolutionErrorEntry* entry = ResolutionErrorTable::find_entry(pool, which);
if (entry != nullptr && entry->nest_host_error() == nullptr) {
if (entry == nullptr) {
// Only add a new entry to the resolution error table if one hasn't been found for this
// constant pool index. In this case resolution succeeded but there's an error in this nest host
// that we use the table to record.
assert(pool->resolved_klass_at(which) != nullptr, "klass should be resolved if there is no entry");
ResolutionErrorTable::add_entry(pool, which, message);
} else {
// An existing entry means we had a true resolution failure (LinkageError) with our nest host, but we
// still want to add the error message for the higher-level access checks to report. We should
// only reach here under the same error condition, so we can ignore the potential race with setting
// the message. If we see it is already set then we can ignore it.
// the message, and set it again.
assert(entry->nest_host_error() == nullptr || strcmp(entry->nest_host_error(), message) == 0, "should be the same message");
entry->set_nest_host_error(message);
} else {
ResolutionErrorTable::add_entry(pool, which, message);
}
}
}

View File

@ -1010,8 +1010,10 @@ void CompilationMemoryStatistic::print_error_report(outputStream* st) {
oom_stats->print_peak_state_on(st);
st->cr();
}
st->print_cr("Compiler Memory Statistic, 10 most expensive compilations:");
print_all_by_size(st, false, false, 0, 10);
if (Thread::current_or_null_safe() != nullptr) {
st->print_cr("Compiler Memory Statistic, 10 most expensive compilations:");
print_all_by_size(st, false, false, 0, 10);
}
}
void CompilationMemoryStatistic::print_final_report(outputStream* st) {

View File

@ -33,10 +33,10 @@
#include "utilities/align.hpp"
G1CollectedHeap* G1AllocRegion::_g1h = nullptr;
G1HeapRegion* G1AllocRegion::_dummy_region = nullptr;
Atomic<G1HeapRegion*> G1AllocRegion::_dummy_region;
void G1AllocRegion::setup(G1CollectedHeap* g1h, G1HeapRegion* dummy_region) {
assert(_dummy_region == nullptr, "should be set once");
assert(_dummy_region.load_relaxed() == nullptr, "should be set once");
assert(dummy_region != nullptr, "pre-condition");
assert(dummy_region->free() == 0, "pre-condition");
@ -46,11 +46,11 @@ void G1AllocRegion::setup(G1CollectedHeap* g1h, G1HeapRegion* dummy_region) {
assert(dummy_region->par_allocate(1, 1, &assert_tmp) == nullptr, "should fail");
_g1h = g1h;
_dummy_region = dummy_region;
_dummy_region.release_store(dummy_region);
}
size_t G1AllocRegion::fill_up_remaining_space(G1HeapRegion* alloc_region) {
assert(alloc_region != nullptr && alloc_region != _dummy_region,
assert(alloc_region != nullptr && alloc_region != _dummy_region.load_relaxed(),
"pre-condition");
size_t result = 0;
@ -111,13 +111,13 @@ size_t G1AllocRegion::retire_internal(G1HeapRegion* alloc_region, bool fill_up)
}
size_t G1AllocRegion::retire(bool fill_up) {
assert_alloc_region(_alloc_region != nullptr, "not initialized properly");
assert_alloc_region(_alloc_region.load_relaxed() != nullptr, "not initialized properly");
size_t waste = 0;
trace("retiring");
G1HeapRegion* alloc_region = _alloc_region;
if (alloc_region != _dummy_region) {
G1HeapRegion* alloc_region = _alloc_region.load_acquire();
if (alloc_region != _dummy_region.load_relaxed()) {
waste = retire_internal(alloc_region, fill_up);
reset_alloc_region();
}
@ -127,7 +127,7 @@ size_t G1AllocRegion::retire(bool fill_up) {
}
HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size) {
assert_alloc_region(_alloc_region == _dummy_region, "pre-condition");
assert_alloc_region(_alloc_region.load_relaxed() == _dummy_region.load_relaxed(), "pre-condition");
trace("attempting region allocation");
G1HeapRegion* new_alloc_region = allocate_new_region(word_size);
@ -138,7 +138,6 @@ HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size) {
HeapWord* result = new_alloc_region->allocate(word_size);
assert_alloc_region(result != nullptr, "the allocation should succeeded");
OrderAccess::storestore();
// Note that we first perform the allocation and then we store the
// region in _alloc_region. This is the reason why an active region
// can never be empty.
@ -154,16 +153,16 @@ HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size) {
void G1AllocRegion::init() {
trace("initializing");
assert_alloc_region(_alloc_region == nullptr, "pre-condition");
assert_alloc_region(_dummy_region != nullptr, "should have been set");
_alloc_region = _dummy_region;
assert_alloc_region(_alloc_region.load_relaxed() == nullptr, "pre-condition");
assert_alloc_region(_dummy_region.load_relaxed() != nullptr, "should have been set");
_alloc_region.release_store(_dummy_region.load_relaxed());
_count = 0;
trace("initialized");
}
void G1AllocRegion::set(G1HeapRegion* alloc_region) {
trace("setting");
assert_alloc_region(_alloc_region == _dummy_region && _count == 0, "pre-condition");
assert_alloc_region(_alloc_region.load_relaxed() == _dummy_region.load_relaxed() && _count == 0, "pre-condition");
update_alloc_region(alloc_region);
trace("set");
@ -175,19 +174,19 @@ void G1AllocRegion::update_alloc_region(G1HeapRegion* alloc_region) {
// maintain the "the alloc region cannot be empty" invariant.
assert_alloc_region(alloc_region != nullptr && !alloc_region->is_empty(), "pre-condition");
_alloc_region = alloc_region;
_alloc_region.release_store(alloc_region);
_count += 1;
trace("updated");
}
G1HeapRegion* G1AllocRegion::release() {
trace("releasing");
G1HeapRegion* alloc_region = _alloc_region;
G1HeapRegion* alloc_region = _alloc_region.load_acquire();
retire(false /* fill_up */);
assert_alloc_region(_alloc_region == _dummy_region, "post-condition of retire()");
_alloc_region = nullptr;
assert_alloc_region(_alloc_region.load_relaxed() == _dummy_region.load_relaxed(), "post-condition of retire()");
_alloc_region.store_relaxed(nullptr);
trace("released");
return (alloc_region == _dummy_region) ? nullptr : alloc_region;
return (alloc_region == _dummy_region.load_relaxed()) ? nullptr : alloc_region;
}
#ifndef PRODUCT
@ -211,12 +210,13 @@ void G1AllocRegion::trace(const char* str, size_t min_word_size, size_t desired_
out->print("%s: %u ", _name, _count);
if (_alloc_region == nullptr) {
G1HeapRegion* alloc_region = _alloc_region.load_acquire();
if (alloc_region == nullptr) {
out->print("null");
} else if (_alloc_region == _dummy_region) {
} else if (alloc_region == _dummy_region.load_relaxed()) {
out->print("DUMMY");
} else {
out->print(HR_FORMAT, HR_FORMAT_PARAMS(_alloc_region));
out->print(HR_FORMAT, HR_FORMAT_PARAMS(alloc_region));
}
out->print(" : %s", str);
@ -235,7 +235,7 @@ void G1AllocRegion::trace(const char* str, size_t min_word_size, size_t desired_
#endif // PRODUCT
G1AllocRegion::G1AllocRegion(const char* name, uint node_index)
: _alloc_region(nullptr),
: _alloc_region(),
_count(0),
_name(name),
_node_index(node_index)
@ -250,7 +250,7 @@ void MutatorAllocRegion::retire_region(G1HeapRegion* alloc_region) {
}
void MutatorAllocRegion::init() {
assert(_retained_alloc_region == nullptr, "Pre-condition");
assert(_retained_alloc_region.load_relaxed() == nullptr, "Pre-condition");
G1AllocRegion::init();
_wasted_bytes = 0;
}
@ -261,8 +261,9 @@ bool MutatorAllocRegion::should_retain(G1HeapRegion* region) {
return false;
}
if (_retained_alloc_region != nullptr &&
free_bytes < _retained_alloc_region->free()) {
G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
if (retained_alloc_region != nullptr &&
free_bytes < retained_alloc_region->free()) {
return false;
}
@ -278,10 +279,11 @@ size_t MutatorAllocRegion::retire(bool fill_up) {
// free than the currently retained region.
if (should_retain(current_region)) {
trace("mutator retained");
if (_retained_alloc_region != nullptr) {
waste = retire_internal(_retained_alloc_region, true);
G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
if (retained_alloc_region != nullptr) {
waste = retire_internal(retained_alloc_region, true);
}
_retained_alloc_region = current_region;
_retained_alloc_region.release_store(current_region);
} else {
waste = retire_internal(current_region, fill_up);
}
@ -300,7 +302,7 @@ size_t MutatorAllocRegion::used_in_alloc_regions() {
used += hr->used();
}
hr = _retained_alloc_region;
hr = _retained_alloc_region.load_acquire();
if (hr != nullptr) {
used += hr->used();
}
@ -313,9 +315,10 @@ G1HeapRegion* MutatorAllocRegion::release() {
// The retained alloc region must be retired and this must be
// done after the above call to release the mutator alloc region,
// since it might update the _retained_alloc_region member.
if (_retained_alloc_region != nullptr) {
_wasted_bytes += retire_internal(_retained_alloc_region, false);
_retained_alloc_region = nullptr;
G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
if (retained_alloc_region != nullptr) {
_wasted_bytes += retire_internal(retained_alloc_region, false);
_retained_alloc_region.store_relaxed(nullptr);
}
log_debug(gc, alloc, region)("Mutator Allocation stats, regions: %u, wasted size: %zu%s (%4.1f%%)",
count(),

View File

@ -29,6 +29,7 @@
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1HeapRegionAttr.hpp"
#include "gc/g1/g1NUMA.hpp"
#include "runtime/atomic.hpp"
class G1CollectedHeap;
@ -40,8 +41,6 @@ class G1CollectedHeap;
// replaced.
class G1AllocRegion : public CHeapObj<mtGC> {
private:
// The active allocating region we are currently allocating out
// of. The invariant is that if this object is initialized (i.e.,
// init() has been called and release() has not) then _alloc_region
@ -52,7 +51,7 @@ private:
// then _alloc_region is null and this object should not be used to
// satisfy allocation requests (it was done this way to force the
// correct use of init() and release()).
G1HeapRegion* volatile _alloc_region;
Atomic<G1HeapRegion*> _alloc_region;
// It keeps track of the distinct number of regions that are used
// for allocation in the active interval of this object, i.e.,
@ -71,7 +70,7 @@ private:
// == end()). When we don't have a valid active region we make
// _alloc_region point to this. This allows us to skip checking
// whether the _alloc_region is null or not.
static G1HeapRegion* _dummy_region;
static Atomic<G1HeapRegion*> _dummy_region;
// After a region is allocated by alloc_new_region, this
// method is used to set it as the active alloc_region
@ -124,9 +123,9 @@ public:
static void setup(G1CollectedHeap* g1h, G1HeapRegion* dummy_region);
G1HeapRegion* get() const {
G1HeapRegion * hr = _alloc_region;
G1HeapRegion * hr = _alloc_region.load_acquire();
// Make sure that the dummy region does not escape this class.
return (hr == _dummy_region) ? nullptr : hr;
return (hr == _dummy_region.load_relaxed()) ? nullptr : hr;
}
uint count() { return _count; }
@ -177,7 +176,7 @@ private:
// Retained allocation region. Used to lower the waste generated
// during mutation by having two active regions if the free space
// in a region about to be retired still could fit a TLAB.
G1HeapRegion* volatile _retained_alloc_region;
Atomic<G1HeapRegion*> _retained_alloc_region;
// Decide if the region should be retained, based on the free size
// in it and the free size in the currently retained region, if any.

View File

@ -32,13 +32,13 @@
#define assert_alloc_region(p, message) \
do { \
assert((p), "[%s] %s c: %u r: " PTR_FORMAT, \
_name, (message), _count, p2i(_alloc_region) \
_name, (message), _count, p2i(_alloc_region.load_relaxed()) \
); \
} while (0)
inline void G1AllocRegion::reset_alloc_region() {
_alloc_region = _dummy_region;
_alloc_region.store_relaxed(_dummy_region.load_relaxed());
}
inline HeapWord* G1AllocRegion::par_allocate(G1HeapRegion* alloc_region, size_t word_size) {
@ -51,7 +51,7 @@ inline HeapWord* G1AllocRegion::par_allocate(G1HeapRegion* alloc_region, size_t
inline HeapWord* G1AllocRegion::attempt_allocation(size_t min_word_size,
size_t desired_word_size,
size_t* actual_word_size) {
G1HeapRegion* alloc_region = _alloc_region;
G1HeapRegion* alloc_region = _alloc_region.load_acquire();
assert_alloc_region(alloc_region != nullptr && !alloc_region->is_empty(), "not initialized properly");
HeapWord* result = alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
@ -97,8 +97,9 @@ inline HeapWord* G1AllocRegion::attempt_allocation_using_new_region(size_t min_w
inline HeapWord* MutatorAllocRegion::attempt_retained_allocation(size_t min_word_size,
size_t desired_word_size,
size_t* actual_word_size) {
if (_retained_alloc_region != nullptr) {
HeapWord* result = _retained_alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
if (retained_alloc_region != nullptr) {
HeapWord* result = retained_alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
if (result != nullptr) {
trace("alloc retained", min_word_size, desired_word_size, *actual_word_size, result);
return result;

View File

@ -77,10 +77,11 @@ void G1Arguments::initialize_alignments() {
}
size_t G1Arguments::conservative_max_heap_alignment() {
if (FLAG_IS_DEFAULT(G1HeapRegionSize)) {
return G1HeapRegion::max_ergonomics_size();
}
return G1HeapRegion::max_region_size();
const size_t region_size = FLAG_IS_DEFAULT(G1HeapRegionSize)
? G1HeapRegion::max_ergonomics_size()
: G1HeapRegion::max_region_size();
return calculate_heap_alignment(region_size);
}
void G1Arguments::initialize_verification_types() {

View File

@ -2355,7 +2355,8 @@ static void print_region_type(outputStream* st, const char* type, uint count, bo
}
void G1CollectedHeap::print_heap_on(outputStream* st) const {
size_t heap_used = Heap_lock->owned_by_self() ? used() : used_unlocked();
size_t heap_used = (Thread::current_or_null_safe() != nullptr &&
Heap_lock->owned_by_self()) ? used() : used_unlocked();
st->print("%-20s", "garbage-first heap");
st->print(" total reserved %zuK, committed %zuK, used %zuK",
_hrm.reserved().byte_size()/K, capacity()/K, heap_used/K);

View File

@ -611,23 +611,24 @@ void G1RemSet::scan_collection_set_code_roots(G1ParScanThreadState* pss,
G1GCPhaseTimes::GCParPhases coderoots_phase,
G1GCPhaseTimes::GCParPhases objcopy_phase) {
EventGCPhaseParallel event;
Tickspan code_root_scan_time;
Tickspan code_root_trim_partially_time;
G1EvacPhaseWithTrimTimeTracker timer(pss, code_root_scan_time, code_root_trim_partially_time);
G1GCPhaseTimes* p = _g1h->phase_times();
{
G1EvacPhaseWithTrimTimeTracker timer(pss, code_root_scan_time, code_root_trim_partially_time);
G1ScanCodeRootsClosure cl(_scan_state, pss, worker_id);
// Code roots work distribution occurs inside the iteration method. So scan all collection
// set regions for all threads.
_g1h->collection_set_iterate_increment_from(&cl, worker_id);
G1ScanCodeRootsClosure cl(_scan_state, pss, worker_id);
// Code roots work distribution occurs inside the iteration method. So scan all collection
// set regions for all threads.
_g1h->collection_set_iterate_increment_from(&cl, worker_id);
p->record_or_add_thread_work_item(coderoots_phase, worker_id, cl.code_roots_scanned(), G1GCPhaseTimes::CodeRootsScannedNMethods);
}
p->record_or_add_time_secs(coderoots_phase, worker_id, code_root_scan_time.seconds());
p->add_time_secs(objcopy_phase, worker_id, code_root_trim_partially_time.seconds());
p->record_or_add_thread_work_item(coderoots_phase, worker_id, cl.code_roots_scanned(), G1GCPhaseTimes::CodeRootsScannedNMethods);
event.commit(GCId::current(), worker_id, G1GCPhaseTimes::phase_name(coderoots_phase));
}

View File

@ -37,8 +37,45 @@
#include "utilities/defaultStream.hpp"
#include "utilities/powerOfTwo.hpp"
size_t ParallelArguments::conservative_max_heap_alignment() {
return compute_heap_alignment();
static size_t num_young_spaces() {
// When using NUMA, we create one MutableNUMASpace for each NUMA node
const size_t num_eden_spaces = UseNUMA ? os::numa_get_groups_num() : 1;
// The young generation must have room for eden + two survivors
return num_eden_spaces + 2;
}
static size_t num_old_spaces() {
return 1;
}
void ParallelArguments::initialize_alignments() {
// Initialize card size before initializing alignments
CardTable::initialize_card_size();
const size_t card_table_alignment = CardTable::ct_max_alignment_constraint();
SpaceAlignment = ParallelScavengeHeap::default_space_alignment();
if (UseLargePages) {
const size_t total_spaces = num_young_spaces() + num_old_spaces();
const size_t page_size = os::page_size_for_region_unaligned(MaxHeapSize, total_spaces);
ParallelScavengeHeap::set_desired_page_size(page_size);
if (page_size == os::vm_page_size()) {
log_warning(gc, heap)("MaxHeapSize (%zu) must be large enough for %zu * page-size; Disabling UseLargePages for heap",
MaxHeapSize, total_spaces);
}
if (page_size > SpaceAlignment) {
SpaceAlignment = page_size;
}
HeapAlignment = lcm(page_size, card_table_alignment);
} else {
assert(is_aligned(SpaceAlignment, os::vm_page_size()), "");
ParallelScavengeHeap::set_desired_page_size(os::vm_page_size());
HeapAlignment = card_table_alignment;
}
}
void ParallelArguments::initialize() {
@ -98,49 +135,36 @@ void ParallelArguments::initialize() {
FullGCForwarding::initialize_flags(heap_reserved_size_bytes());
}
void ParallelArguments::initialize_alignments() {
// Initialize card size before initializing alignments
CardTable::initialize_card_size();
SpaceAlignment = ParallelScavengeHeap::default_space_alignment();
HeapAlignment = compute_heap_alignment();
}
size_t ParallelArguments::conservative_max_heap_alignment() {
// The card marking array and the offset arrays for old generations are
// committed in os pages as well. Make sure they are entirely full (to
// avoid partial page problems), e.g. if 512 bytes heap corresponds to 1
// byte entry and the os page size is 4096, the maximum heap size should
// be 512*4096 = 2MB aligned.
void ParallelArguments::initialize_heap_flags_and_sizes_one_pass() {
// Do basic sizing work
GenArguments::initialize_heap_flags_and_sizes();
}
size_t alignment = CardTable::ct_max_alignment_constraint();
void ParallelArguments::initialize_heap_flags_and_sizes() {
initialize_heap_flags_and_sizes_one_pass();
if (!UseLargePages) {
ParallelScavengeHeap::set_desired_page_size(os::vm_page_size());
return;
if (UseLargePages) {
// In presence of large pages we have to make sure that our
// alignment is large page aware.
alignment = lcm(os::large_page_size(), alignment);
}
// If using large-page, need to update SpaceAlignment so that spaces are page-size aligned.
const size_t min_pages = 4; // 1 for eden + 1 for each survivor + 1 for old
const size_t page_sz = os::page_size_for_region_aligned(MinHeapSize, min_pages);
ParallelScavengeHeap::set_desired_page_size(page_sz);
if (page_sz == os::vm_page_size()) {
log_warning(gc, heap)("MinHeapSize (%zu) must be large enough for 4 * page-size; Disabling UseLargePages for heap", MinHeapSize);
return;
}
// Space is largepage-aligned.
size_t new_alignment = page_sz;
if (new_alignment != SpaceAlignment) {
SpaceAlignment = new_alignment;
// Redo everything from the start
initialize_heap_flags_and_sizes_one_pass();
}
}
size_t ParallelArguments::heap_reserved_size_bytes() {
return MaxHeapSize;
return alignment;
}
CollectedHeap* ParallelArguments::create_heap() {
return new ParallelScavengeHeap();
}
size_t ParallelArguments::young_gen_size_lower_bound() {
return num_young_spaces() * SpaceAlignment;
}
size_t ParallelArguments::old_gen_size_lower_bound() {
return num_old_spaces() * SpaceAlignment;
}
size_t ParallelArguments::heap_reserved_size_bytes() {
return MaxHeapSize;
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2017, Red Hat, Inc. and/or its affiliates.
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,21 +26,16 @@
#ifndef SHARE_GC_PARALLEL_PARALLELARGUMENTS_HPP
#define SHARE_GC_PARALLEL_PARALLELARGUMENTS_HPP
#include "gc/shared/gcArguments.hpp"
#include "gc/shared/genArguments.hpp"
class CollectedHeap;
class ParallelArguments : public GenArguments {
private:
virtual void initialize_alignments();
virtual void initialize_heap_flags_and_sizes();
void initialize_heap_flags_and_sizes_one_pass();
virtual void initialize();
virtual size_t conservative_max_heap_alignment();
virtual CollectedHeap* create_heap();
virtual size_t young_gen_size_lower_bound();
virtual size_t old_gen_size_lower_bound();
public:
static size_t heap_reserved_size_bytes();

View File

@ -307,9 +307,13 @@ HeapWord* ParallelScavengeHeap::mem_allocate_cas_noexpand(size_t size, bool is_t
HeapWord* ParallelScavengeHeap::mem_allocate_work(size_t size, bool is_tlab) {
for (uint loop_count = 0; /* empty */; ++loop_count) {
HeapWord* result = mem_allocate_cas_noexpand(size, is_tlab);
if (result != nullptr) {
return result;
HeapWord* result;
{
ConditionalMutexLocker locker(Heap_lock, !is_init_completed());
result = mem_allocate_cas_noexpand(size, is_tlab);
if (result != nullptr) {
return result;
}
}
// Read total_collections() under the lock so that multiple
@ -326,10 +330,15 @@ HeapWord* ParallelScavengeHeap::mem_allocate_work(size_t size, bool is_tlab) {
}
if (!is_init_completed()) {
// Can't do GC; try heap expansion to satisfy the request.
result = expand_heap_and_allocate(size, is_tlab);
if (result != nullptr) {
return result;
// Double checked locking, this ensure that is_init_completed() does not
// transition while expanding the heap.
MonitorLocker ml(InitCompleted_lock, Monitor::_no_safepoint_check_flag);
if (!is_init_completed()) {
// Can't do GC; try heap expansion to satisfy the request.
result = expand_heap_and_allocate(size, is_tlab);
if (result != nullptr) {
return result;
}
}
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2017, Red Hat, Inc. and/or its affiliates.
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,11 +28,49 @@
#include "gc/shared/fullGCForwarding.hpp"
#include "gc/shared/gcArguments.hpp"
static size_t compute_heap_alignment() {
// The card marking array and the offset arrays for old generations are
// committed in os pages as well. Make sure they are entirely full (to
// avoid partial page problems), e.g. if 512 bytes heap corresponds to 1
// byte entry and the os page size is 4096, the maximum heap size should
// be 512*4096 = 2MB aligned.
size_t alignment = CardTable::ct_max_alignment_constraint();
if (UseLargePages) {
// In presence of large pages we have to make sure that our
// alignment is large page aware.
alignment = lcm(os::large_page_size(), alignment);
}
return alignment;
}
void SerialArguments::initialize_alignments() {
// Initialize card size before initializing alignments
CardTable::initialize_card_size();
SpaceAlignment = (size_t)Generation::GenGrain;
HeapAlignment = compute_heap_alignment();
}
void SerialArguments::initialize() {
GCArguments::initialize();
FullGCForwarding::initialize_flags(MaxHeapSize);
}
size_t SerialArguments::conservative_max_heap_alignment() {
return MAX2((size_t)Generation::GenGrain, compute_heap_alignment());
}
CollectedHeap* SerialArguments::create_heap() {
return new SerialHeap();
}
size_t SerialArguments::young_gen_size_lower_bound() {
// The young generation must be aligned and have room for eden + two survivors
return 3 * SpaceAlignment;
}
size_t SerialArguments::old_gen_size_lower_bound() {
return SpaceAlignment;
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2017, Red Hat, Inc. and/or its affiliates.
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,12 +28,14 @@
#include "gc/shared/genArguments.hpp"
class CollectedHeap;
class SerialArguments : public GenArguments {
private:
virtual void initialize_alignments();
virtual void initialize();
virtual size_t conservative_max_heap_alignment();
virtual CollectedHeap* create_heap();
virtual size_t young_gen_size_lower_bound();
virtual size_t old_gen_size_lower_bound();
};
#endif // SHARE_GC_SERIAL_SERIALARGUMENTS_HPP

View File

@ -304,9 +304,12 @@ HeapWord* SerialHeap::mem_allocate_work(size_t size, bool is_tlab) {
HeapWord* result = nullptr;
for (uint try_count = 1; /* break */; try_count++) {
result = mem_allocate_cas_noexpand(size, is_tlab);
if (result != nullptr) {
break;
{
ConditionalMutexLocker locker(Heap_lock, !is_init_completed());
result = mem_allocate_cas_noexpand(size, is_tlab);
if (result != nullptr) {
break;
}
}
uint gc_count_before; // Read inside the Heap_lock locked region.
{
@ -320,10 +323,15 @@ HeapWord* SerialHeap::mem_allocate_work(size_t size, bool is_tlab) {
}
if (!is_init_completed()) {
// Can't do GC; try heap expansion to satisfy the request.
result = expand_heap_and_allocate(size, is_tlab);
if (result != nullptr) {
return result;
// Double checked locking, this ensure that is_init_completed() does not
// transition while expanding the heap.
MonitorLocker ml(InitCompleted_lock, Monitor::_no_safepoint_check_flag);
if (!is_init_completed()) {
// Can't do GC; try heap expansion to satisfy the request.
result = expand_heap_and_allocate(size, is_tlab);
if (result != nullptr) {
return result;
}
}
}

View File

@ -27,6 +27,7 @@
#include "cppstdlib/limits.hpp"
#include "gc/shared/freeListAllocator.hpp"
#include "runtime/atomic.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/lockFreeStack.hpp"
@ -38,7 +39,7 @@ class BufferNode {
InternalSizeType _index;
InternalSizeType _capacity;
BufferNode* volatile _next;
Atomic<BufferNode*> _next;
void* _buffer[1]; // Pseudo flexible array member.
BufferNode(InternalSizeType capacity)
@ -58,11 +59,11 @@ public:
return std::numeric_limits<InternalSizeType>::max();
}
static BufferNode* volatile* next_ptr(BufferNode& bn) { return &bn._next; }
static Atomic<BufferNode*>* next_ptr(BufferNode& bn) { return &bn._next; }
typedef LockFreeStack<BufferNode, &next_ptr> Stack;
BufferNode* next() const { return _next; }
void set_next(BufferNode* n) { _next = n; }
BufferNode* next() const { return _next.load_relaxed(); }
void set_next(BufferNode* n) { _next.store_relaxed(n); }
size_t index() const { return _index; }
void set_index(size_t i) {

View File

@ -62,24 +62,6 @@ void GCArguments::initialize_heap_sizes() {
initialize_size_info();
}
size_t GCArguments::compute_heap_alignment() {
// The card marking array and the offset arrays for old generations are
// committed in os pages as well. Make sure they are entirely full (to
// avoid partial page problems), e.g. if 512 bytes heap corresponds to 1
// byte entry and the os page size is 4096, the maximum heap size should
// be 512*4096 = 2MB aligned.
size_t alignment = CardTable::ct_max_alignment_constraint();
if (UseLargePages) {
// In presence of large pages we have to make sure that our
// alignment is large page aware.
alignment = lcm(os::large_page_size(), alignment);
}
return alignment;
}
#ifdef ASSERT
void GCArguments::assert_flags() {
assert(InitialHeapSize <= MaxHeapSize, "Ergonomics decided on incompatible initial and maximum heap sizes");

View File

@ -45,6 +45,8 @@ protected:
public:
virtual void initialize();
// Return the (conservative) maximum heap alignment
virtual size_t conservative_max_heap_alignment() = 0;
// Used by heap size heuristics to determine max
@ -59,8 +61,6 @@ public:
}
void initialize_heap_sizes();
static size_t compute_heap_alignment();
};
#endif // SHARE_GC_SHARED_GCARGUMENTS_HPP

View File

@ -25,6 +25,7 @@
#include "runtime/mutex.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/os.hpp"
#include "runtime/thread.hpp"
#include "utilities/ostream.hpp"
stringStream* GCLogPrecious::_lines = nullptr;
@ -83,7 +84,8 @@ void GCLogPrecious::print_on_error(outputStream* st) {
return;
}
if (!_lock->try_lock_without_rank_check()) {
if (Thread::current_or_null_safe() == nullptr ||
!_lock->try_lock_without_rank_check()) {
st->print_cr("<Skipped>\n");
return;
}

View File

@ -291,7 +291,7 @@
"size on systems with small physical memory size") \
range(0.0, 100.0) \
\
product(double, InitialRAMPercentage, 0.2, \
product(double, InitialRAMPercentage, 0.0, \
"Percentage of real memory used for initial heap size") \
range(0.0, 100.0) \
\

View File

@ -42,17 +42,6 @@ size_t MaxOldSize = 0;
// See more in JDK-8346005
size_t OldSize = ScaleForWordSize(4*M);
size_t GenArguments::conservative_max_heap_alignment() { return (size_t)Generation::GenGrain; }
static size_t young_gen_size_lower_bound() {
// The young generation must be aligned and have room for eden + two survivors
return 3 * SpaceAlignment;
}
static size_t old_gen_size_lower_bound() {
return SpaceAlignment;
}
size_t GenArguments::scale_by_NewRatio_aligned(size_t base_size, size_t alignment) {
return align_down_bounded(base_size / (NewRatio + 1), alignment);
}
@ -64,13 +53,6 @@ static size_t bound_minus_alignment(size_t desired_size,
return MIN2(desired_size, max_minus);
}
void GenArguments::initialize_alignments() {
// Initialize card size before initializing alignments
CardTable::initialize_card_size();
SpaceAlignment = (size_t)Generation::GenGrain;
HeapAlignment = compute_heap_alignment();
}
void GenArguments::initialize_heap_flags_and_sizes() {
GCArguments::initialize_heap_flags_and_sizes();

View File

@ -38,17 +38,16 @@ extern size_t OldSize;
class GenArguments : public GCArguments {
friend class TestGenCollectorPolicy; // Testing
private:
virtual void initialize_alignments();
virtual void initialize_size_info();
// Return the (conservative) maximum heap alignment
virtual size_t conservative_max_heap_alignment();
DEBUG_ONLY(void assert_flags();)
DEBUG_ONLY(void assert_size_info();)
static size_t scale_by_NewRatio_aligned(size_t base_size, size_t alignment);
virtual size_t young_gen_size_lower_bound() = 0;
virtual size_t old_gen_size_lower_bound() = 0;
protected:
virtual void initialize_heap_flags_and_sizes();
};

View File

@ -250,7 +250,7 @@ static JVMFlag::Error MaxSizeForHeapAlignment(const char* name, size_t value, bo
} else
#endif
{
heap_alignment = GCArguments::compute_heap_alignment();
heap_alignment = Arguments::conservative_max_heap_alignment();
}
return MaxSizeForAlignment(name, value, heap_alignment, verbose);
@ -285,7 +285,7 @@ JVMFlag::Error SoftMaxHeapSizeConstraintFunc(size_t value, bool verbose) {
JVMFlag::Error HeapBaseMinAddressConstraintFunc(size_t value, bool verbose) {
// If an overflow happened in Arguments::set_heap_size(), MaxHeapSize will have too large a value.
// Check for this by ensuring that MaxHeapSize plus the requested min base address still fit within max_uintx.
if (UseCompressedOops && FLAG_IS_ERGO(MaxHeapSize) && (value > (max_uintx - MaxHeapSize))) {
if (value > (max_uintx - MaxHeapSize)) {
JVMFlag::printError(verbose,
"HeapBaseMinAddress (%zu) or MaxHeapSize (%zu) is too large. "
"Sum of them must be less than or equal to maximum of size_t (%zu)\n",

View File

@ -27,7 +27,6 @@
#include "logging/log.hpp"
#include "memory/allocation.inline.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/atomicAccess.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/os.hpp"
#include "runtime/safepoint.hpp"
@ -85,28 +84,28 @@ SATBMarkQueueSet::~SATBMarkQueueSet() {
// remains set until the count is reduced to zero.
// Increment count. If count > threshold, set flag, else maintain flag.
static void increment_count(volatile size_t* cfptr, size_t threshold) {
static void increment_count(Atomic<size_t>* cfptr, size_t threshold) {
size_t old;
size_t value = AtomicAccess::load(cfptr);
size_t value = cfptr->load_relaxed();
do {
old = value;
value += 2;
assert(value > old, "overflow");
if (value > threshold) value |= 1;
value = AtomicAccess::cmpxchg(cfptr, old, value);
value = cfptr->compare_exchange(old, value);
} while (value != old);
}
// Decrement count. If count == 0, clear flag, else maintain flag.
static void decrement_count(volatile size_t* cfptr) {
static void decrement_count(Atomic<size_t>* cfptr) {
size_t old;
size_t value = AtomicAccess::load(cfptr);
size_t value = cfptr->load_relaxed();
do {
assert((value >> 1) != 0, "underflow");
old = value;
value -= 2;
if (value <= 1) value = 0;
value = AtomicAccess::cmpxchg(cfptr, old, value);
value = cfptr->compare_exchange(old, value);
} while (value != old);
}
@ -332,7 +331,7 @@ void SATBMarkQueueSet::print_all(const char* msg) {
#endif // PRODUCT
void SATBMarkQueueSet::abandon_completed_buffers() {
AtomicAccess::store(&_count_and_process_flag, size_t(0));
_count_and_process_flag.store_relaxed(0u);
BufferNode* buffers_to_delete = _list.pop_all();
while (buffers_to_delete != nullptr) {
BufferNode* bn = buffers_to_delete;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -29,6 +29,7 @@
#include "memory/allocation.hpp"
#include "memory/padded.hpp"
#include "oops/oopsHierarchy.hpp"
#include "runtime/atomic.hpp"
class Thread;
class Monitor;
@ -87,7 +88,7 @@ class SATBMarkQueueSet: public PtrQueueSet {
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, 0);
PaddedEnd<BufferNode::Stack> _list;
volatile size_t _count_and_process_flag;
Atomic<size_t> _count_and_process_flag;
// These are rarely (if ever) changed, so same cache line as count.
size_t _process_completed_buffers_threshold;
size_t _buffer_enqueue_threshold;
@ -148,12 +149,12 @@ public:
// The number of buffers in the list. Racy and not updated atomically
// with the set of completed buffers.
size_t completed_buffers_num() const {
return _count_and_process_flag >> 1;
return _count_and_process_flag.load_relaxed() >> 1;
}
// Return true if completed buffers should be processed.
bool process_completed_buffers() const {
return (_count_and_process_flag & 1) != 0;
return (_count_and_process_flag.load_relaxed() & 1) != 0;
}
#ifndef PRODUCT

View File

@ -37,6 +37,7 @@
#include "runtime/globals_extension.hpp"
#include "runtime/java.hpp"
#include "utilities/defaultStream.hpp"
#include "utilities/powerOfTwo.hpp"
void ShenandoahArguments::initialize() {
#if !(defined AARCH64 || defined AMD64 || defined PPC64 || defined RISCV64)
@ -205,7 +206,7 @@ void ShenandoahArguments::initialize() {
}
size_t ShenandoahArguments::conservative_max_heap_alignment() {
size_t align = ShenandoahMaxRegionSize;
size_t align = next_power_of_2(ShenandoahMaxRegionSize);
if (UseLargePages) {
align = MAX2(align, os::large_page_size());
}

View File

@ -208,13 +208,13 @@ oop ShenandoahGenerationalHeap::evacuate_object(oop p, Thread* thread) {
assert(ShenandoahThreadLocalData::is_evac_allowed(thread), "must be enclosed in oom-evac scope");
ShenandoahHeapRegion* r = heap_region_containing(p);
assert(!r->is_humongous(), "never evacuate humongous objects");
ShenandoahHeapRegion* from_region = heap_region_containing(p);
assert(!from_region->is_humongous(), "never evacuate humongous objects");
ShenandoahAffiliation target_gen = r->affiliation();
// gc_generation() can change asynchronously and should not be used here.
assert(active_generation() != nullptr, "Error");
if (active_generation()->is_young() && target_gen == YOUNG_GENERATION) {
// Try to keep the object in the same generation
const ShenandoahAffiliation target_gen = from_region->affiliation();
if (target_gen == YOUNG_GENERATION) {
markWord mark = p->mark();
if (mark.is_marked()) {
// Already forwarded.
@ -224,26 +224,31 @@ oop ShenandoahGenerationalHeap::evacuate_object(oop p, Thread* thread) {
if (mark.has_displaced_mark_helper()) {
// We don't want to deal with MT here just to ensure we read the right mark word.
// Skip the potential promotion attempt for this one.
} else if (age_census()->is_tenurable(r->age() + mark.age())) {
oop result = try_evacuate_object(p, thread, r, OLD_GENERATION);
} else if (age_census()->is_tenurable(from_region->age() + mark.age())) {
// If the object is tenurable, try to promote it
oop result = try_evacuate_object<YOUNG_GENERATION, OLD_GENERATION>(p, thread, from_region->age());
// If we failed to promote this aged object, we'll fall through to code below and evacuate to young-gen.
if (result != nullptr) {
return result;
}
// If we failed to promote this aged object, we'll fall through to code below and evacuate to young-gen.
}
return try_evacuate_object<YOUNG_GENERATION, YOUNG_GENERATION>(p, thread, from_region->age());
}
return try_evacuate_object(p, thread, r, target_gen);
assert(target_gen == OLD_GENERATION, "Expected evacuation to old");
return try_evacuate_object<OLD_GENERATION, OLD_GENERATION>(p, thread, from_region->age());
}
// try_evacuate_object registers the object and dirties the associated remembered set information when evacuating
// to OLD_GENERATION.
oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, ShenandoahHeapRegion* from_region,
ShenandoahAffiliation target_gen) {
template<ShenandoahAffiliation FROM_GENERATION, ShenandoahAffiliation TO_GENERATION>
oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, uint from_region_age) {
bool alloc_from_lab = true;
bool has_plab = false;
HeapWord* copy = nullptr;
size_t size = ShenandoahForwarding::size(p);
bool is_promotion = (target_gen == OLD_GENERATION) && from_region->is_young();
constexpr bool is_promotion = (TO_GENERATION == OLD_GENERATION) && (FROM_GENERATION == YOUNG_GENERATION);
#ifdef ASSERT
if (ShenandoahOOMDuringEvacALot &&
@ -252,7 +257,7 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
} else {
#endif
if (UseTLAB) {
switch (target_gen) {
switch (TO_GENERATION) {
case YOUNG_GENERATION: {
copy = allocate_from_gclab(thread, size);
if ((copy == nullptr) && (size < ShenandoahThreadLocalData::gclab_size(thread))) {
@ -300,7 +305,7 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
if (copy == nullptr) {
// If we failed to allocate in LAB, we'll try a shared allocation.
if (!is_promotion || !has_plab || (size > PLAB::min_size())) {
ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared_gc(size, target_gen, is_promotion);
ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared_gc(size, TO_GENERATION, is_promotion);
copy = allocate_memory(req);
alloc_from_lab = false;
}
@ -314,8 +319,8 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
#endif
if (copy == nullptr) {
if (target_gen == OLD_GENERATION) {
if (from_region->is_young()) {
if (TO_GENERATION == OLD_GENERATION) {
if (FROM_GENERATION == YOUNG_GENERATION) {
// Signal that promotion failed. Will evacuate this old object somewhere in young gen.
old_generation()->handle_failed_promotion(thread, size);
return nullptr;
@ -327,14 +332,12 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
}
control_thread()->handle_alloc_failure_evac(size);
oom_evac_handler()->handle_out_of_memory_during_evacuation();
return ShenandoahBarrierSet::resolve_forwarded(p);
}
if (ShenandoahEvacTracking) {
evac_tracker()->begin_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen);
evac_tracker()->begin_evacuation(thread, size * HeapWordSize, FROM_GENERATION, TO_GENERATION);
}
// Copy the object:
@ -342,8 +345,8 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
oop copy_val = cast_to_oop(copy);
// Update the age of the evacuated object
if (target_gen == YOUNG_GENERATION && is_aging_cycle()) {
ShenandoahHeap::increase_object_age(copy_val, from_region->age() + 1);
if (TO_GENERATION == YOUNG_GENERATION && is_aging_cycle()) {
increase_object_age(copy_val, from_region_age + 1);
}
// Try to install the new forwarding pointer.
@ -360,18 +363,12 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
if (ShenandoahEvacTracking) {
// Record that the evacuation succeeded
evac_tracker()->end_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen);
evac_tracker()->end_evacuation(thread, size * HeapWordSize, FROM_GENERATION, TO_GENERATION);
}
if (target_gen == OLD_GENERATION) {
old_generation()->handle_evacuation(copy, size, from_region->is_young());
} else {
// When copying to the old generation above, we don't care
// about recording object age in the census stats.
assert(target_gen == YOUNG_GENERATION, "Error");
if (TO_GENERATION == OLD_GENERATION) {
old_generation()->handle_evacuation(copy, size);
}
shenandoah_assert_correct(nullptr, copy_val);
return copy_val;
} else {
// Failed to evacuate. We need to deal with the object that is left behind. Since this
// new allocation is certainly after TAMS, it will be considered live in the next cycle.
@ -382,7 +379,7 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
// For LAB allocations, it is enough to rollback the allocation ptr. Either the next
// object will overwrite this stale copy, or the filler object on LAB retirement will
// do this.
switch (target_gen) {
switch (TO_GENERATION) {
case YOUNG_GENERATION: {
ShenandoahThreadLocalData::gclab(thread)->undo_allocation(copy, size);
break;
@ -405,14 +402,16 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
// we have to keep the fwdptr initialized and pointing to our (stale) copy.
assert(size >= ShenandoahHeap::min_fill_size(), "previously allocated object known to be larger than min_size");
fill_with_object(copy, size);
shenandoah_assert_correct(nullptr, copy_val);
// For non-LAB allocations, the object has already been registered
}
shenandoah_assert_correct(nullptr, result);
return result;
}
shenandoah_assert_correct(nullptr, result);
return result;
}
template oop ShenandoahGenerationalHeap::try_evacuate_object<YOUNG_GENERATION, YOUNG_GENERATION>(oop p, Thread* thread, uint from_region_age);
template oop ShenandoahGenerationalHeap::try_evacuate_object<YOUNG_GENERATION, OLD_GENERATION>(oop p, Thread* thread, uint from_region_age);
template oop ShenandoahGenerationalHeap::try_evacuate_object<OLD_GENERATION, OLD_GENERATION>(oop p, Thread* thread, uint from_region_age);
inline HeapWord* ShenandoahGenerationalHeap::allocate_from_plab(Thread* thread, size_t size, bool is_promotion) {
assert(UseTLAB, "TLABs should be enabled");

View File

@ -87,7 +87,9 @@ public:
void update_region_ages(ShenandoahMarkingContext* ctx);
oop evacuate_object(oop p, Thread* thread) override;
oop try_evacuate_object(oop p, Thread* thread, ShenandoahHeapRegion* from_region, ShenandoahAffiliation target_gen);
template<ShenandoahAffiliation FROM_REGION, ShenandoahAffiliation TO_REGION>
oop try_evacuate_object(oop p, Thread* thread, uint from_region_age);
// In the generational mode, we will use these two functions for young, mixed, and global collections.
// For young and mixed, the generation argument will be the young generation, otherwise it will be the global generation.

View File

@ -34,4 +34,5 @@ inline bool ShenandoahGenerationalHeap::is_tenurable(const ShenandoahHeapRegion*
return _age_census->is_tenurable(r->age());
}
#endif // SHARE_GC_SHENANDOAH_SHENANDOAHGENERATIONALHEAP_INLINE_HPP

View File

@ -1015,7 +1015,7 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req
// Record the plab configuration for this result and register the object.
if (result != nullptr && req.is_old()) {
old_generation()->configure_plab_for_current_thread(req);
if (req.type() == ShenandoahAllocRequest::_alloc_shared_gc) {
if (!req.is_lab_alloc()) {
// Register the newly allocated object while we're holding the global lock since there's no synchronization
// built in to the implementation of register_object(). There are potential races when multiple independent
// threads are allocating objects, some of which might span the same card region. For example, consider

View File

@ -619,7 +619,7 @@ void ShenandoahOldGeneration::log_failed_promotion(LogStream& ls, Thread* thread
}
}
void ShenandoahOldGeneration::handle_evacuation(HeapWord* obj, size_t words, bool promotion) {
void ShenandoahOldGeneration::handle_evacuation(HeapWord* obj, size_t words) const {
// Only register the copy of the object that won the evacuation race.
_card_scan->register_object_without_lock(obj);

View File

@ -179,7 +179,7 @@ public:
void log_failed_promotion(LogStream& ls, Thread* thread, size_t size) const;
// A successful evacuation re-dirties the cards and registers the object with the remembered set
void handle_evacuation(HeapWord* obj, size_t words, bool promotion);
void handle_evacuation(HeapWord* obj, size_t words) const;
// Clear the flag after it is consumed by the control thread
bool clear_failed_evacuation() {

View File

@ -1087,7 +1087,6 @@ private:
ZRelocateSmallAllocator _small_allocator;
ZRelocateMediumAllocator _medium_allocator;
const size_t _total_forwardings;
volatile size_t _numa_local_forwardings;
public:
ZRelocateTask(ZRelocationSet* relocation_set,
@ -1104,8 +1103,7 @@ public:
_medium_targets(medium_targets),
_small_allocator(_generation),
_medium_allocator(_generation, shared_medium_targets),
_total_forwardings(relocation_set->nforwardings()),
_numa_local_forwardings(0) {
_total_forwardings(relocation_set->nforwardings()) {
for (uint32_t i = 0; i < ZNUMA::count(); i++) {
ZRelocationSetParallelIterator* const iter = _iters->addr(i);
@ -1124,18 +1122,17 @@ public:
// Signal that we're not using the queue anymore. Used mostly for asserts.
_queue->deactivate();
if (ZNUMA::is_enabled()) {
log_debug(gc, reloc, numa)("Forwardings relocated NUMA-locally: %zu / %zu (%.0f%%)",
_numa_local_forwardings, _total_forwardings, percent_of(_numa_local_forwardings, _total_forwardings));
}
}
virtual void work() {
ZRelocateWork<ZRelocateSmallAllocator> small(&_small_allocator, _small_targets->addr(), _generation);
ZRelocateWork<ZRelocateMediumAllocator> medium(&_medium_allocator, _medium_targets->addr(), _generation);
const uint32_t num_nodes = ZNUMA::count();
uint32_t numa_local_forwardings_worker = 0;
const uint32_t start_node = ZNUMA::id();
uint32_t current_node = start_node;
bool has_affinity = false;
bool has_affinity_current_node = false;
const auto do_forwarding = [&](ZForwarding* forwarding) {
ZPage* const page = forwarding->page();
@ -1167,26 +1164,30 @@ public:
const auto do_forwarding_one_from_iter = [&]() {
ZForwarding* forwarding;
const uint32_t start_node = ZNUMA::id();
uint32_t current_node = start_node;
for (uint32_t i = 0; i < num_nodes; i++) {
for (;;) {
if (_iters->get(current_node).next_if(&forwarding, check_numa_local, current_node)) {
claim_and_do_forwarding(forwarding);
if (current_node == start_node) {
// Track if this forwarding was relocated on the local NUMA node
numa_local_forwardings_worker++;
// Set thread affinity for NUMA-local processing (if needed)
if (UseNUMA && !has_affinity_current_node) {
os::numa_set_thread_affinity(Thread::current(), ZNUMA::numa_id_to_node(current_node));
has_affinity = true;
has_affinity_current_node = true;
}
// Perform the forwarding task
claim_and_do_forwarding(forwarding);
return true;
}
// Check next node.
// No work found on the current node, move to the next node
current_node = (current_node + 1) % num_nodes;
}
has_affinity_current_node = false;
return false;
// If we've looped back to the starting node there's no more work to do
if (current_node == start_node) {
return false;
}
}
};
for (;;) {
@ -1209,11 +1210,13 @@ public:
}
}
if (ZNUMA::is_enabled()) {
AtomicAccess::add(&_numa_local_forwardings, numa_local_forwardings_worker, memory_order_relaxed);
}
_queue->leave();
if (UseNUMA && has_affinity) {
// Restore the affinity of the thread so that it isn't bound to a specific
// node any more
os::numa_set_thread_affinity(Thread::current(), -1);
}
}
virtual void resize_workers(uint nworkers) {

View File

@ -130,7 +130,10 @@ static void z_verify_root_oop_object(zaddress addr, void* p) {
static void z_verify_old_oop(zpointer* p) {
const zpointer o = *p;
assert(o != zpointer::null, "Old should not contain raw null");
if (o == zpointer::null) {
guarantee(ZGeneration::young()->is_phase_mark_complete(), "Only possible when flip promoting");
guarantee(ZHeap::heap()->page(p)->is_allocating(), "Raw nulls only possible in allocating pages");
}
if (!z_is_null_relaxed(o)) {
if (ZPointer::is_mark_good(o)) {
// Even though the pointer is mark good, we can't verify that it should

View File

@ -217,7 +217,8 @@ static bool compute_top_frame(const JfrSampleRequest& request, frame& top_frame,
const PcDesc* const pc_desc = get_pc_desc(sampled_nm, sampled_pc);
if (is_valid(pc_desc)) {
intptr_t* const synthetic_sp = sender_sp - sampled_nm->frame_size();
top_frame = frame(synthetic_sp, synthetic_sp, sender_sp - 2, pc_desc->real_pc(sampled_nm), sampled_nm);
intptr_t* const synthetic_fp = sender_sp AARCH64_ONLY( - frame::sender_sp_offset);
top_frame = frame(synthetic_sp, synthetic_sp, synthetic_fp, pc_desc->real_pc(sampled_nm), sampled_nm);
in_continuation = is_in_continuation(top_frame, jt);
return true;
}

View File

@ -437,7 +437,7 @@ ReservedSpace HeapReserver::Instance::try_reserve_range(char *highest_start,
if (reserved.is_reserved()) {
if (reserved.base() >= aligned_heap_base_min_address &&
size <= (uintptr_t)(upper_bound - reserved.base())) {
size <= (size_t)(upper_bound - reserved.base())) {
// Got a successful reservation.
return reserved;
}
@ -546,16 +546,16 @@ ReservedHeapSpace HeapReserver::Instance::reserve_compressed_oops_heap(const siz
const size_t attach_point_alignment = lcm(alignment, os_attach_point_alignment);
char* aligned_heap_base_min_address = align_up((char*)HeapBaseMinAddress, alignment);
size_t noaccess_prefix = ((aligned_heap_base_min_address + size) > (char*)OopEncodingHeapMax) ?
uintptr_t aligned_heap_base_min_address = align_up(MAX2(HeapBaseMinAddress, alignment), alignment);
size_t noaccess_prefix = ((aligned_heap_base_min_address + size) > OopEncodingHeapMax) ?
noaccess_prefix_size : 0;
ReservedSpace reserved{};
// Attempt to alloc at user-given address.
if (!FLAG_IS_DEFAULT(HeapBaseMinAddress)) {
reserved = try_reserve_memory(size + noaccess_prefix, alignment, page_size, aligned_heap_base_min_address);
if (reserved.base() != aligned_heap_base_min_address) { // Enforce this exact address.
reserved = try_reserve_memory(size + noaccess_prefix, alignment, page_size, (char*)aligned_heap_base_min_address);
if (reserved.base() != (char*)aligned_heap_base_min_address) { // Enforce this exact address.
release(reserved);
reserved = {};
}
@ -575,38 +575,41 @@ ReservedHeapSpace HeapReserver::Instance::reserve_compressed_oops_heap(const siz
// Attempt to allocate so that we can run without base and scale (32-Bit unscaled compressed oops).
// Give it several tries from top of range to bottom.
if (aligned_heap_base_min_address + size <= (char *)UnscaledOopHeapMax) {
if (aligned_heap_base_min_address + size <= UnscaledOopHeapMax) {
// Calc address range within we try to attach (range of possible start addresses).
char* const highest_start = align_down((char *)UnscaledOopHeapMax - size, attach_point_alignment);
char* const lowest_start = align_up(aligned_heap_base_min_address, attach_point_alignment);
reserved = try_reserve_range(highest_start, lowest_start, attach_point_alignment,
aligned_heap_base_min_address, (char *)UnscaledOopHeapMax, size, alignment, page_size);
uintptr_t const highest_start = align_down(UnscaledOopHeapMax - size, attach_point_alignment);
uintptr_t const lowest_start = align_up(aligned_heap_base_min_address, attach_point_alignment);
assert(lowest_start <= highest_start, "lowest: " INTPTR_FORMAT " highest: " INTPTR_FORMAT ,
lowest_start, highest_start);
reserved = try_reserve_range((char*)highest_start, (char*)lowest_start, attach_point_alignment,
(char*)aligned_heap_base_min_address, (char*)UnscaledOopHeapMax, size, alignment, page_size);
}
// zerobased: Attempt to allocate in the lower 32G.
char *zerobased_max = (char *)OopEncodingHeapMax;
const uintptr_t zerobased_max = OopEncodingHeapMax;
// Give it several tries from top of range to bottom.
if (aligned_heap_base_min_address + size <= zerobased_max && // Zerobased theoretical possible.
((!reserved.is_reserved()) || // No previous try succeeded.
(reserved.end() > zerobased_max))) { // Unscaled delivered an arbitrary address.
(reserved.end() > (char*)zerobased_max))) { // Unscaled delivered an arbitrary address.
// Release previous reservation
release(reserved);
// Calc address range within we try to attach (range of possible start addresses).
char *const highest_start = align_down(zerobased_max - size, attach_point_alignment);
uintptr_t const highest_start = align_down(zerobased_max - size, attach_point_alignment);
// Need to be careful about size being guaranteed to be less
// than UnscaledOopHeapMax due to type constraints.
char *lowest_start = aligned_heap_base_min_address;
uint64_t unscaled_end = UnscaledOopHeapMax - size;
if (unscaled_end < UnscaledOopHeapMax) { // unscaled_end wrapped if size is large
lowest_start = MAX2(lowest_start, (char*)unscaled_end);
uintptr_t lowest_start = aligned_heap_base_min_address;
if (size < UnscaledOopHeapMax) {
lowest_start = MAX2<uintptr_t>(lowest_start, UnscaledOopHeapMax - size);
}
lowest_start = align_up(lowest_start, attach_point_alignment);
reserved = try_reserve_range(highest_start, lowest_start, attach_point_alignment,
aligned_heap_base_min_address, zerobased_max, size, alignment, page_size);
assert(lowest_start <= highest_start, "lowest: " INTPTR_FORMAT " highest: " INTPTR_FORMAT,
lowest_start, highest_start);
reserved = try_reserve_range((char*)highest_start, (char*)lowest_start, attach_point_alignment,
(char*)aligned_heap_base_min_address, (char*)zerobased_max, size, alignment, page_size);
}
// Now we go for heaps with base != 0. We need a noaccess prefix to efficiently
@ -616,17 +619,17 @@ ReservedHeapSpace HeapReserver::Instance::reserve_compressed_oops_heap(const siz
// Try to attach at addresses that are aligned to OopEncodingHeapMax. Disjointbase mode.
char** addresses = get_attach_addresses_for_disjoint_mode();
int i = 0;
while ((addresses[i] != nullptr) && // End of array not yet reached.
((!reserved.is_reserved()) || // No previous try succeeded.
(reserved.end() > zerobased_max && // Not zerobased or unscaled address.
// Not disjoint address.
while ((addresses[i] != nullptr) && // End of array not yet reached.
((!reserved.is_reserved()) || // No previous try succeeded.
(reserved.end() > (char*)zerobased_max && // Not zerobased or unscaled address.
// Not disjoint address.
!CompressedOops::is_disjoint_heap_base_address((address)reserved.base())))) {
// Release previous reservation
release(reserved);
char* const attach_point = addresses[i];
assert(attach_point >= aligned_heap_base_min_address, "Flag support broken");
assert((uintptr_t)attach_point >= aligned_heap_base_min_address, "Flag support broken");
reserved = try_reserve_memory(size + noaccess_prefix, alignment, page_size, attach_point);
i++;
}

View File

@ -326,7 +326,7 @@ bool RegionNode::is_unreachable_region(const PhaseGVN* phase) {
// First, cut the simple case of fallthrough region when NONE of
// region's phis references itself directly or through a data node.
if (is_possible_unsafe_loop(phase)) {
if (is_possible_unsafe_loop()) {
// If we have a possible unsafe loop, check if the region node is actually unreachable from root.
if (is_unreachable_from_root(phase)) {
_is_unreachable_region = true;
@ -336,7 +336,7 @@ bool RegionNode::is_unreachable_region(const PhaseGVN* phase) {
return false;
}
bool RegionNode::is_possible_unsafe_loop(const PhaseGVN* phase) const {
bool RegionNode::is_possible_unsafe_loop() const {
uint max = outcnt();
uint i;
for (i = 0; i < max; i++) {
@ -634,8 +634,8 @@ Node *RegionNode::Ideal(PhaseGVN *phase, bool can_reshape) {
}
} else if (can_reshape && cnt == 1) {
// Is it dead loop?
// If it is LoopNopde it had 2 (+1 itself) inputs and
// one of them was cut. The loop is dead if it was EntryContol.
// If it is LoopNode it had 2 (+1 itself) inputs and
// one of them was cut. The loop is dead if it was EntryControl.
// Loop node may have only one input because entry path
// is removed in PhaseIdealLoop::Dominators().
assert(!this->is_Loop() || cnt_orig <= 3, "Loop node should have 3 or less inputs");
@ -1392,7 +1392,7 @@ bool PhiNode::try_clean_memory_phi(PhaseIterGVN* igvn) {
}
assert(is_diamond_phi() > 0, "sanity");
assert(req() == 3, "same as region");
const Node* region = in(0);
RegionNode* region = in(0)->as_Region();
for (uint i = 1; i < 3; i++) {
Node* phi_input = in(i);
if (phi_input != nullptr && phi_input->is_MergeMem() && region->in(i)->outcnt() == 1) {
@ -1400,8 +1400,9 @@ bool PhiNode::try_clean_memory_phi(PhaseIterGVN* igvn) {
MergeMemNode* merge_mem = phi_input->as_MergeMem();
uint j = 3 - i;
Node* other_phi_input = in(j);
if (other_phi_input != nullptr && other_phi_input == merge_mem->base_memory()) {
if (other_phi_input != nullptr && other_phi_input == merge_mem->base_memory() && !is_data_loop(region, phi_input, igvn)) {
// merge_mem is a successor memory to other_phi_input, and is not pinned inside the diamond, so push it out.
// Only proceed if the transformation doesn't create a data loop
// This will allow the diamond to collapse completely if there are no other phis left.
igvn->replace_node(this, merge_mem);
return true;

View File

@ -84,7 +84,7 @@ private:
bool _is_unreachable_region;
LoopStatus _loop_status;
bool is_possible_unsafe_loop(const PhaseGVN* phase) const;
bool is_possible_unsafe_loop() const;
bool is_unreachable_from_root(const PhaseGVN* phase) const;
public:
// Node layout (parallels PhiNode):

View File

@ -1471,6 +1471,65 @@ static OptoReg::Name find_first_set(LRG& lrg, RegMask& mask) {
return assigned;
}
OptoReg::Name PhaseChaitin::select_bias_lrg_color(LRG& lrg) {
uint bias_lrg1_idx = _lrg_map.find(lrg._copy_bias);
uint bias_lrg2_idx = _lrg_map.find(lrg._copy_bias2);
// If bias_lrg1 has a color
if (bias_lrg1_idx != 0 && !_ifg->_yanked->test(bias_lrg1_idx)) {
OptoReg::Name reg = lrgs(bias_lrg1_idx).reg();
// and it is legal for lrg
if (is_legal_reg(lrg, reg)) {
return reg;
}
}
// If bias_lrg2 has a color
if (bias_lrg2_idx != 0 && !_ifg->_yanked->test(bias_lrg2_idx)) {
OptoReg::Name reg = lrgs(bias_lrg2_idx).reg();
// and it is legal for lrg
if (is_legal_reg(lrg, reg)) {
return reg;
}
}
uint bias_lrg_idx = 0;
if (bias_lrg1_idx != 0 && bias_lrg2_idx != 0) {
// Since none of the bias live ranges are part of the IFG yet, constrain the
// definition mask with the bias live range with the least degrees of
// freedom. This will increase the chances of register sharing once the bias
// live range becomes part of the IFG.
lrgs(bias_lrg1_idx).compute_set_mask_size();
lrgs(bias_lrg2_idx).compute_set_mask_size();
bias_lrg_idx = lrgs(bias_lrg1_idx).degrees_of_freedom() >
lrgs(bias_lrg2_idx).degrees_of_freedom()
? bias_lrg2_idx
: bias_lrg1_idx;
} else if (bias_lrg1_idx != 0) {
bias_lrg_idx = bias_lrg1_idx;
} else if (bias_lrg2_idx != 0) {
bias_lrg_idx = bias_lrg2_idx;
}
// Register masks with offset excludes all mask bits before the offset.
// Such masks are mainly used for allocation from stack slots. Constrain the
// register mask of definition live range using bias mask only if
// both masks have zero offset.
if (bias_lrg_idx != 0 && !lrg.mask().is_offset() &&
!lrgs(bias_lrg_idx).mask().is_offset()) {
// Choose a color which is legal for bias_lrg
ResourceMark rm(C->regmask_arena());
RegMask tempmask(lrg.mask(), C->regmask_arena());
tempmask.and_with(lrgs(bias_lrg_idx).mask());
tempmask.clear_to_sets(lrg.num_regs());
OptoReg::Name reg = find_first_set(lrg, tempmask);
if (OptoReg::is_valid(reg)) {
return reg;
}
}
return OptoReg::Bad;
}
// Choose a color using the biasing heuristic
OptoReg::Name PhaseChaitin::bias_color(LRG& lrg) {
@ -1492,25 +1551,10 @@ OptoReg::Name PhaseChaitin::bias_color(LRG& lrg) {
}
}
uint copy_lrg = _lrg_map.find(lrg._copy_bias);
if (copy_lrg != 0) {
// If he has a color,
if(!_ifg->_yanked->test(copy_lrg)) {
OptoReg::Name reg = lrgs(copy_lrg).reg();
// And it is legal for you,
if (is_legal_reg(lrg, reg)) {
return reg;
}
} else if (!lrg.mask().is_offset()) {
// Choose a color which is legal for him
ResourceMark rm(C->regmask_arena());
RegMask tempmask(lrg.mask(), C->regmask_arena());
tempmask.and_with(lrgs(copy_lrg).mask());
tempmask.clear_to_sets(lrg.num_regs());
OptoReg::Name reg = find_first_set(lrg, tempmask);
if (OptoReg::is_valid(reg))
return reg;
}
// Try biasing the color with non-interfering bias live range[s].
OptoReg::Name reg = select_bias_lrg_color(lrg);
if (OptoReg::is_valid(reg)) {
return reg;
}
// If no bias info exists, just go with the register selection ordering
@ -1524,7 +1568,7 @@ OptoReg::Name PhaseChaitin::bias_color(LRG& lrg) {
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
// copy removal to remove many more copies, by preventing a just-assigned
// register from being repeatedly assigned.
OptoReg::Name reg = lrg.mask().find_first_elem();
reg = lrg.mask().find_first_elem();
if( (++_alternate & 1) && OptoReg::is_valid(reg) ) {
// This 'Remove; find; Insert' idiom is an expensive way to find the
// SECOND element in the mask.
@ -1640,6 +1684,27 @@ uint PhaseChaitin::Select( ) {
}
}
}
Node* def = lrg->_def;
if (lrg->is_singledef() && !lrg->_is_bound && def->is_Mach()) {
MachNode* mdef = def->as_Mach();
if (Matcher::is_register_biasing_candidate(mdef, 1)) {
Node* in1 = mdef->in(mdef->operand_index(1));
if (in1 != nullptr && lrg->_copy_bias == 0) {
lrg->_copy_bias = _lrg_map.find(in1);
}
}
// For commutative operations, def allocation can also be
// biased towards LRG of second input's def.
if (Matcher::is_register_biasing_candidate(mdef, 2)) {
Node* in2 = mdef->in(mdef->operand_index(2));
if (in2 != nullptr && lrg->_copy_bias2 == 0) {
lrg->_copy_bias2 = _lrg_map.find(in2);
}
}
}
//assert(is_infinite_stack == lrg->mask().is_infinite_stack(), "nbrs must not change InfiniteStackedness");
// Aligned pairs need aligned masks
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");

View File

@ -63,6 +63,7 @@ public:
uint _risk_bias; // Index of LRG which we want to avoid color
uint _copy_bias; // Index of LRG which we want to share color
uint _copy_bias2; // Index of second LRG which we want to share color
uint _next; // Index of next LRG in linked list
uint _prev; // Index of prev LRG in linked list
@ -703,6 +704,8 @@ private:
OptoReg::Name choose_color(LRG& lrg);
// Helper function which implements biasing heuristic
OptoReg::Name bias_color(LRG& lrg);
// Helper function which implements color biasing
OptoReg::Name select_bias_lrg_color(LRG& lrg);
// Split uncolorable live ranges
// Return new number of live ranges

View File

@ -35,6 +35,97 @@
#ifndef PRODUCT
// Support for printing properties
class PrintProperties
{
private:
IdealGraphPrinter* _printer;
public:
PrintProperties(IdealGraphPrinter* printer) : _printer(printer) {}
void print_node_properties(Node* node);
void print_lrg_properties(const LRG& lrg, const char* buffer);
void print_property(int flag, const char* name);
void print_property(int flag, const char* name, const char* val);
void print_property(int flag, const char* name, int val);
};
void PrintProperties::print_node_properties(Node* node) {
const jushort flags = node->flags();
print_property((flags & Node::Flag_is_Copy), "is_copy");
print_property((flags & Node::Flag_rematerialize), "rematerialize");
print_property((flags & Node::Flag_needs_anti_dependence_check), "needs_anti_dependence_check");
print_property((flags & Node::Flag_is_macro), "is_macro");
print_property((flags & Node::Flag_is_Con), "is_con");
print_property((flags & Node::Flag_is_cisc_alternate), "is_cisc_alternate");
print_property((flags & Node::Flag_is_dead_loop_safe), "is_dead_loop_safe");
print_property((flags & Node::Flag_may_be_short_branch), "may_be_short_branch");
print_property((flags & Node::Flag_has_call), "has_call");
print_property((flags & Node::Flag_has_swapped_edges), "has_swapped_edges");
Matcher* matcher = _printer->C->matcher();
if (matcher != nullptr) {
print_property(matcher->is_shared(node),"is_shared");
print_property(!(matcher->is_shared(node)), "is_shared", IdealGraphPrinter::FALSE_VALUE);
print_property(matcher->is_dontcare(node), "is_dontcare");
print_property(!(matcher->is_dontcare(node)),"is_dontcare", IdealGraphPrinter::FALSE_VALUE);
Node* old = matcher->find_old_node(node);
if (old != nullptr) {
print_property(true, "old_node_idx", old->_idx);
}
}
}
void PrintProperties::print_lrg_properties(const LRG &lrg, const char *buffer) {
print_property(true, "mask", buffer);
print_property(true, "mask_size", lrg.mask_size());
if (lrg._degree_valid) {
print_property(true, "degree", lrg.degree());
}
print_property(true, "num_regs", lrg.num_regs());
print_property(true, "reg_pressure", lrg.reg_pressure());
print_property(true, "cost", lrg._cost);
print_property(true, "area", lrg._area);
print_property(true, "score", lrg.score());
print_property((lrg._risk_bias != 0), "risk_bias", lrg._risk_bias);
print_property((lrg._copy_bias != 0), "copy_bias", lrg._copy_bias);
print_property((lrg._copy_bias2 != 0), "copy_bias2", lrg._copy_bias2);
print_property(lrg.is_singledef(), "is_singledef");
print_property(lrg.is_multidef(), "is_multidef");
print_property(lrg._is_oop, "is_oop");
print_property(lrg._is_float, "is_float");
print_property(lrg._is_vector, "is_vector");
print_property(lrg._is_predicate, "is_predicate");
print_property(lrg._is_scalable, "is_scalable");
print_property(lrg._was_spilled1, "was_spilled1");
print_property(lrg._was_spilled2, "was_spilled2");
print_property(lrg._direct_conflict, "direct_conflict");
print_property(lrg._fat_proj, "fat_proj");
print_property(lrg._was_lo, "_was_lo");
print_property(lrg._has_copy, "has_copy");
print_property(lrg._at_risk, "at_risk");
print_property(lrg._must_spill, "must_spill");
print_property(lrg._is_bound, "is_bound");
print_property((lrg._msize_valid && lrg._degree_valid && lrg.lo_degree()), "trivial");
}
void PrintProperties::print_property(int flag, const char* name) {
if (flag != 0) {
_printer->print_prop(name, IdealGraphPrinter::TRUE_VALUE);
}
}
void PrintProperties::print_property(int flag, const char* name, const char* val) {
if (flag != 0) {
_printer->print_prop(name, val);
}
}
void PrintProperties::print_property(int flag, const char* name, int val) {
if (flag != 0) {
_printer->print_prop(name, val);
}
}
// Constants
// Keep consistent with Java constants
const char *IdealGraphPrinter::INDENT = " ";
@ -522,54 +613,8 @@ void IdealGraphPrinter::visit_node(Node* n, bool edges) {
print_prop("jvms", buffer);
}
const jushort flags = node->flags();
if (flags & Node::Flag_is_Copy) {
print_prop("is_copy", "true");
}
if (flags & Node::Flag_rematerialize) {
print_prop("rematerialize", "true");
}
if (flags & Node::Flag_needs_anti_dependence_check) {
print_prop("needs_anti_dependence_check", "true");
}
if (flags & Node::Flag_is_macro) {
print_prop("is_macro", "true");
}
if (flags & Node::Flag_is_Con) {
print_prop("is_con", "true");
}
if (flags & Node::Flag_is_cisc_alternate) {
print_prop("is_cisc_alternate", "true");
}
if (flags & Node::Flag_is_dead_loop_safe) {
print_prop("is_dead_loop_safe", "true");
}
if (flags & Node::Flag_may_be_short_branch) {
print_prop("may_be_short_branch", "true");
}
if (flags & Node::Flag_has_call) {
print_prop("has_call", "true");
}
if (flags & Node::Flag_has_swapped_edges) {
print_prop("has_swapped_edges", "true");
}
if (C->matcher() != nullptr) {
if (C->matcher()->is_shared(node)) {
print_prop("is_shared", "true");
} else {
print_prop("is_shared", "false");
}
if (C->matcher()->is_dontcare(node)) {
print_prop("is_dontcare", "true");
} else {
print_prop("is_dontcare", "false");
}
Node* old = C->matcher()->find_old_node(node);
if (old != nullptr) {
print_prop("old_node_idx", old->_idx);
}
}
PrintProperties print_node(this);
print_node.print_node_properties(node);
if (node->is_Proj()) {
print_prop("con", (int)node->as_Proj()->_con);
@ -1145,73 +1190,10 @@ void IdealGraphPrinter::print(const char* name, Node* node, GrowableArray<const
buffer[0] = 0;
stringStream lrg_mask_stream(buffer, sizeof(buffer) - 1);
lrg.mask().dump(&lrg_mask_stream);
print_prop("mask", buffer);
print_prop("mask_size", lrg.mask_size());
if (lrg._degree_valid) {
print_prop("degree", lrg.degree());
}
print_prop("num_regs", lrg.num_regs());
print_prop("reg_pressure", lrg.reg_pressure());
print_prop("cost", lrg._cost);
print_prop("area", lrg._area);
print_prop("score", lrg.score());
if (lrg._risk_bias != 0) {
print_prop("risk_bias", lrg._risk_bias);
}
if (lrg._copy_bias != 0) {
print_prop("copy_bias", lrg._copy_bias);
}
if (lrg.is_singledef()) {
print_prop("is_singledef", TRUE_VALUE);
}
if (lrg.is_multidef()) {
print_prop("is_multidef", TRUE_VALUE);
}
if (lrg._is_oop) {
print_prop("is_oop", TRUE_VALUE);
}
if (lrg._is_float) {
print_prop("is_float", TRUE_VALUE);
}
if (lrg._is_vector) {
print_prop("is_vector", TRUE_VALUE);
}
if (lrg._is_predicate) {
print_prop("is_predicate", TRUE_VALUE);
}
if (lrg._is_scalable) {
print_prop("is_scalable", TRUE_VALUE);
}
if (lrg._was_spilled1) {
print_prop("was_spilled1", TRUE_VALUE);
}
if (lrg._was_spilled2) {
print_prop("was_spilled2", TRUE_VALUE);
}
if (lrg._direct_conflict) {
print_prop("direct_conflict", TRUE_VALUE);
}
if (lrg._fat_proj) {
print_prop("fat_proj", TRUE_VALUE);
}
if (lrg._was_lo) {
print_prop("_was_lo", TRUE_VALUE);
}
if (lrg._has_copy) {
print_prop("has_copy", TRUE_VALUE);
}
if (lrg._at_risk) {
print_prop("at_risk", TRUE_VALUE);
}
if (lrg._must_spill) {
print_prop("must_spill", TRUE_VALUE);
}
if (lrg._is_bound) {
print_prop("is_bound", TRUE_VALUE);
}
if (lrg._msize_valid && lrg._degree_valid && lrg.lo_degree()) {
print_prop("trivial", TRUE_VALUE);
}
PrintProperties print_node(this);
print_node.print_lrg_properties(lrg, buffer);
tail(PROPERTIES_ELEMENT);
tail(LIVE_RANGE_ELEMENT);
}

View File

@ -46,8 +46,9 @@ class ConnectionGraph;
class Parse;
class IdealGraphPrinter : public CHeapObj<mtCompiler> {
private:
friend class PrintProperties;
private:
static const char *INDENT;
static const char *TOP_ELEMENT;
static const char *GROUP_ELEMENT;

View File

@ -460,6 +460,13 @@ int MachNode::operand_index(Node* def) const {
return -1;
}
int MachNode::operand_num_edges(uint oper_index) const {
if (num_opnds() > oper_index) {
return _opnds[oper_index]->num_edges();
}
return 0;
}
//------------------------------peephole---------------------------------------
// Apply peephole rule(s) to this instruction
int MachNode::peephole(Block *block, int block_index, PhaseCFG* cfg_, PhaseRegAlloc *ra_) {

View File

@ -266,6 +266,7 @@ public:
int operand_index(uint operand) const;
int operand_index(const MachOper *oper) const;
int operand_index(Node* m) const;
int operand_num_edges(uint operand) const;
// Register class input is expected in
virtual const RegMask &in_RegMask(uint) const;

View File

@ -512,6 +512,8 @@ public:
DEBUG_ONLY( bool verify_after_postselect_cleanup(); )
public:
static bool is_register_biasing_candidate(const MachNode* mdef, int oper_index);
// This routine is run whenever a graph fails to match.
// If it returns, the compiler should bailout to interpreter without error.
// In non-product mode, SoftMatchFailure is false to detect non-canonical

View File

@ -828,26 +828,26 @@ public:
#undef DEFINE_CLASS_ID
// Flags are sorted by usage frequency.
enum NodeFlags {
Flag_is_Copy = 1 << 0, // should be first bit to avoid shift
Flag_rematerialize = 1 << 1,
Flag_needs_anti_dependence_check = 1 << 2,
Flag_is_macro = 1 << 3,
Flag_is_Con = 1 << 4,
Flag_is_cisc_alternate = 1 << 5,
Flag_is_dead_loop_safe = 1 << 6,
Flag_may_be_short_branch = 1 << 7,
Flag_avoid_back_to_back_before = 1 << 8,
Flag_avoid_back_to_back_after = 1 << 9,
Flag_has_call = 1 << 10,
Flag_has_swapped_edges = 1 << 11,
Flag_is_scheduled = 1 << 12,
Flag_is_expensive = 1 << 13,
Flag_is_predicated_vector = 1 << 14,
Flag_for_post_loop_opts_igvn = 1 << 15,
Flag_for_merge_stores_igvn = 1 << 16,
Flag_is_removed_by_peephole = 1 << 17,
Flag_is_predicated_using_blend = 1 << 18,
enum NodeFlags : uint64_t {
Flag_is_Copy = 1ULL << 0, // should be first bit to avoid shift
Flag_rematerialize = 1ULL << 1,
Flag_needs_anti_dependence_check = 1ULL << 2,
Flag_is_macro = 1ULL << 3,
Flag_is_Con = 1ULL << 4,
Flag_is_cisc_alternate = 1ULL << 5,
Flag_is_dead_loop_safe = 1ULL << 6,
Flag_may_be_short_branch = 1ULL << 7,
Flag_avoid_back_to_back_before = 1ULL << 8,
Flag_avoid_back_to_back_after = 1ULL << 9,
Flag_has_call = 1ULL << 10,
Flag_has_swapped_edges = 1ULL << 11,
Flag_is_scheduled = 1ULL << 12,
Flag_is_expensive = 1ULL << 13,
Flag_is_predicated_vector = 1ULL << 14,
Flag_for_post_loop_opts_igvn = 1ULL << 15,
Flag_for_merge_stores_igvn = 1ULL << 16,
Flag_is_removed_by_peephole = 1ULL << 17,
Flag_is_predicated_using_blend = 1ULL << 18,
_last_flag = Flag_is_predicated_using_blend
};

View File

@ -1022,27 +1022,39 @@ bool VPointer::can_make_speculative_aliasing_check_with(const VPointer& other) c
// or at the multiversion_if. That is before the pre-loop. From the construction of
// VPointer, we already know that all its variables (except iv) are pre-loop invariant.
//
// For the computation of main_init, we also need the pre_limit, and so we need
// to check that this value is pre-loop invariant. In the case of non-equal iv_scales,
// we also need the main_limit in the aliasing check, and so this value must then
// also be pre-loop invariant.
// In VPointer::make_speculative_aliasing_check_with we compute main_init in all
// cases. For this, we require pre_init and pre_limit. These values must be available
// for the speculative check, i.e. their control must dominate the speculative check.
// Further, "if vp1.iv_scale() != vp2.iv_scale()" we additionally need to have
// main_limit available for the speculative check.
// Note: no matter if the speculative check is inserted as a predicate or at the
// multiversion if, the speculative check happens before (dominates) the
// pre-loop.
Node* pre_init = _vloop.pre_loop_end()->init_trip();
Opaque1Node* pre_limit_opaq = _vloop.pre_loop_end()->limit()->as_Opaque1();
Node* pre_limit = pre_limit_opaq->in(1);
Node* main_limit = _vloop.cl()->limit();
if (!_vloop.is_pre_loop_invariant(pre_limit)) {
if (!_vloop.is_available_for_speculative_check(pre_init)) {
#ifdef ASSERT
if (_vloop.is_trace_speculative_aliasing_analysis()) {
tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: pre_limit is not pre-loop independent!");
tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: pre_limit is not available at speculative check!");
}
#endif
return false;
}
if (!_vloop.is_available_for_speculative_check(pre_limit)) {
#ifdef ASSERT
if (_vloop.is_trace_speculative_aliasing_analysis()) {
tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: pre_limit is not available at speculative check!");
}
#endif
return false;
}
if (vp1.iv_scale() != vp2.iv_scale() && !_vloop.is_pre_loop_invariant(main_limit)) {
if (vp1.iv_scale() != vp2.iv_scale() && !_vloop.is_available_for_speculative_check(main_limit)) {
#ifdef ASSERT
if (_vloop.is_trace_speculative_aliasing_analysis()) {
tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: main_limit is not pre-loop independent!");
tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: main_limit is not available at speculative check!");
}
#endif
return false;
@ -1119,6 +1131,8 @@ BoolNode* VPointer::make_speculative_aliasing_check_with(const VPointer& other,
Node* pre_limit = pre_limit_opaq->in(1);
assert(_vloop.is_pre_loop_invariant(pre_init), "needed for aliasing check before pre-loop");
assert(_vloop.is_pre_loop_invariant(pre_limit), "needed for aliasing check before pre-loop");
assert(_vloop.is_available_for_speculative_check(pre_init), "ctrl must be early enough to avoid cycles");
assert(_vloop.is_available_for_speculative_check(pre_limit), "ctrl must be early enough to avoid cycles");
Node* pre_initL = new ConvI2LNode(pre_init);
Node* pre_limitL = new ConvI2LNode(pre_limit);
@ -1180,6 +1194,7 @@ BoolNode* VPointer::make_speculative_aliasing_check_with(const VPointer& other,
jint main_iv_stride = _vloop.iv_stride();
Node* main_limit = _vloop.cl()->limit();
assert(_vloop.is_pre_loop_invariant(main_limit), "needed for aliasing check before pre-loop");
assert(_vloop.is_available_for_speculative_check(main_limit), "ctrl must be early enough to avoid cycles");
Node* main_limitL = new ConvI2LNode(main_limit);
phase->register_new_node_with_ctrl_of(main_limitL, pre_init);

View File

@ -236,6 +236,8 @@ public:
// Some nodes must be pre-loop invariant, so that they can be used for conditions
// before or inside the pre-loop. For example, alignment of main-loop vector
// memops must be achieved in the pre-loop, via the exit check in the pre-loop.
// Note: this condition is NOT strong enough for speculative checks, those happen
// before the pre-loop. See is_available_for_speculative_check
bool is_pre_loop_invariant(Node* n) const {
// Must be in the main-loop, otherwise we can't access the pre-loop.
// This fails during SuperWord::unrolling_analysis, but that is ok.
@ -257,6 +259,28 @@ public:
return is_before_pre_loop(early);
}
// Nodes that are to be used in speculative checks must be available early enough.
// Note: the speculative check happens before the pre-loop, either at the auto
// vectorization predicate or the multiversion if. This is before the
// pre-loop, and thus the condition here is stronger then the one from
// is_pre_loop_invariant.
bool is_available_for_speculative_check(Node* n) const {
assert(are_speculative_checks_possible(), "meaningless without speculative check");
ParsePredicateSuccessProj* parse_predicate_proj = auto_vectorization_parse_predicate_proj();
// Find the control of the predicate:
ProjNode* proj = (parse_predicate_proj != nullptr) ? parse_predicate_proj : multiversioning_fast_proj();
Node* check_ctrl = proj->in(0)->as_If()->in(0);
// Often, the control of n already dominates that of the predicate.
Node* n_ctrl = phase()->get_ctrl(n);
if (phase()->is_dominator(n_ctrl, check_ctrl)) { return true; }
// But in some cases, the ctrl of n is after that of the predicate,
// but the early ctrl is before the predicate.
Node* n_early = phase()->compute_early_ctrl(n, n_ctrl);
return phase()->is_dominator(n_early, check_ctrl);
}
// Check if the loop passes some basic preconditions for vectorization.
// Return indicates if analysis succeeded.
bool check_preconditions();

View File

@ -40,38 +40,76 @@ void VTransformGraph::add_vtnode(VTransformNode* vtnode) {
} \
)
// This is similar to IGVN optimization. But we are a bit lazy, and don't care about
// notification / worklist, since the list of nodes is rather small, and we don't
// expect optimizations that trickle over the whole graph.
void VTransformGraph::optimize(VTransform& vtransform) {
TRACE_OPTIMIZE( tty->print_cr("\nVTransformGraph::optimize"); )
bool progress = true;
DEBUG_ONLY(int pass_count = 0;)
while (progress) {
progress = false;
assert(++pass_count < 10, "ensure we do not have endless loops");
for (int i = 0; i < _vtnodes.length(); i++) {
VTransformNode* vtn = _vtnodes.at(i);
if (!vtn->is_alive()) { continue; }
progress |= vtn->optimize(_vloop_analyzer, vtransform);
// Nodes that have no use any more are dead.
if (vtn->out_strong_edges() == 0 &&
// There are some exceptions:
// 1. Memory phi uses are not modeled, so they appear to have no use here, but must be kept alive.
// 2. Similarly, some stores may not have their memory uses modeled, but need to be kept alive.
// 3. Outer node with strong inputs: is a use after the loop that we must keep alive.
!(vtn->isa_PhiScalar() != nullptr ||
vtn->is_load_or_store_in_loop() ||
(vtn->isa_Outer() != nullptr && vtn->has_strong_in_edge()))) {
vtn->mark_dead();
progress = true;
}
}
void VTransformOptimize::worklist_push(VTransformNode* vtn) {
if (!_worklist_set.test_set(vtn->_idx)) {
_worklist.push(vtn);
}
}
VTransformNode* VTransformOptimize::worklist_pop() {
VTransformNode* vtn = _worklist.pop();
_worklist_set.remove(vtn->_idx);
return vtn;
}
void VTransform::optimize() {
NOT_PRODUCT( if (vloop().is_trace_optimization()) { tty->print_cr("\nVTransform::optimize"); } )
ResourceMark rm;
VTransformOptimize vtoptimize(_vloop_analyzer, *this);
vtoptimize.optimize();
}
void VTransformOptimize::optimize() {
// Initialize: push all nodes to worklist.
for (int i = 0; i < _vtransform.graph().vtnodes().length(); i++) {
VTransformNode* vtn = _vtransform.graph().vtnodes().at(i);
worklist_push(vtn);
}
// We don't want to iterate too many times. We set some arbitrary limit,
// just to catch infinite loops.
DEBUG_ONLY( int allowed_steps = 100 * _worklist.length(); )
// Optimize iteratively.
while (_worklist.is_nonempty()) {
VTransformNode* vtn = worklist_pop();
optimize_step(vtn);
assert(--allowed_steps > 0, "no endless loop");
}
DEBUG_ONLY( verify(); )
}
#ifdef ASSERT
void VTransformOptimize::verify() {
for (int i = 0; i < _vtransform.graph().vtnodes().length(); i++) {
VTransformNode* vtn = _vtransform.graph().vtnodes().at(i);
assert(!optimize_step(vtn), "Missed optimization during VTransform::optimize for %s", vtn->name());
assert(_worklist.is_empty(), "vtnode on worklist despite no progress for %s", vtn->name());
}
}
#endif
// Return true if (and only if) we made progress.
bool VTransformOptimize::optimize_step(VTransformNode* vtn) {
if (!vtn->is_alive()) { return false; }
bool progress = vtn->optimize(*this);
// Nodes that have no use any more are dead.
if (vtn->out_strong_edges() == 0 &&
// There are some exceptions:
// 1. Memory phi uses are not modeled, so they appear to have no use here, but must be kept alive.
// 2. Similarly, some stores may not have their memory uses modeled, but need to be kept alive.
// 3. Outer node with strong inputs: is a use after the loop that we must keep alive.
!(vtn->isa_PhiScalar() != nullptr ||
vtn->is_load_or_store_in_loop() ||
(vtn->isa_Outer() != nullptr && vtn->has_strong_in_edge()))) {
vtn->mark_dead(*this);
return true;
}
return progress;
}
// Compute a linearization of the graph. We do this with a reverse-post-order of a DFS.
// This only works if the graph is a directed acyclic graph (DAG). The C2 graph, and
// the VLoopDependencyGraph are both DAGs, but after introduction of vectors/packs, the
@ -1141,8 +1179,8 @@ VTransformApplyResult VTransformBoolVectorNode::apply(VTransformApplyState& appl
return VTransformApplyResult::make_vector(vn);
}
bool VTransformReductionVectorNode::optimize(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform) {
return optimize_move_non_strict_order_reductions_out_of_loop(vloop_analyzer, vtransform);
bool VTransformReductionVectorNode::optimize(VTransformOptimize& vtoptimize) {
return optimize_move_non_strict_order_reductions_out_of_loop(vtoptimize);
}
int VTransformReductionVectorNode::vector_reduction_opcode() const {
@ -1213,7 +1251,7 @@ bool VTransformReductionVectorNode::requires_strict_order() const {
// become profitable, since the expensive reduction node is moved
// outside the loop, and instead cheaper element-wise vector accumulations
// are performed inside the loop.
bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_out_of_loop_preconditions(VTransform& vtransform) {
bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_out_of_loop_preconditions(const VTransform& vtransform) {
// We have a phi with a single use.
VTransformPhiScalarNode* phi = in_req(1)->isa_PhiScalar();
if (phi == nullptr) {
@ -1260,13 +1298,13 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
current_red->element_basic_type() != bt ||
current_red->vector_length() != vlen) {
TRACE_OPTIMIZE(
tty->print(" Cannot move out of loop, other reduction node does not match:");
tty->print(" Cannot move out of loop, other reduction node does not match: ");
print();
tty->print(" other: ");
if (current_red != nullptr) {
current_red->print();
} else {
tty->print("nullptr");
tty->print_cr("nullptr");
}
)
return false; // not compatible
@ -1314,7 +1352,8 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
return true; // success
}
bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_out_of_loop(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform) {
bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_out_of_loop(VTransformOptimize& vtoptimize) {
VTransform& vtransform = vtoptimize.vtransform();
if (!optimize_move_non_strict_order_reductions_out_of_loop_preconditions(vtransform)) {
return false;
}
@ -1328,7 +1367,7 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
const uint vlen = vector_length();
const BasicType bt = element_basic_type();
const int vopc = VectorNode::opcode(sopc, bt);
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
PhaseIdealLoop* phase = vtoptimize.vloop_analyzer().vloop().phase();
// Create a vector of identity values.
Node* identity = ReductionNode::make_identity_con_scalar(phase->igvn(), sopc, bt);
@ -1341,6 +1380,7 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
// Look at old scalar phi.
VTransformPhiScalarNode* phi_scalar = in_req(1)->isa_PhiScalar();
PhiNode* old_phi = phi_scalar->node();
vtoptimize.worklist_push(phi_scalar);
VTransformNode* init = phi_scalar->in_req(1);
TRACE_OPTIMIZE(
@ -1354,6 +1394,7 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
phi_vector->init_req(0, phi_scalar->in_req(0));
phi_vector->init_req(1, vtn_identity_vector);
// Note: backedge comes later
vtoptimize.worklist_push(phi_vector);
// Traverse down the chain of reductions, and replace them with vector_accumulators.
VTransformReductionVectorNode* first_red = this;
@ -1365,6 +1406,8 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
VTransformVectorNode* vector_accumulator = new (vtransform.arena()) VTransformElementWiseVectorNode(vtransform, 3, current_red->properties(), vopc);
vector_accumulator->init_req(1, current_vector_accumulator);
vector_accumulator->init_req(2, vector_input);
vtoptimize.worklist_push(current_red);
vtoptimize.worklist_push(vector_accumulator);
TRACE_OPTIMIZE(
tty->print(" replace ");
current_red->print();

View File

@ -24,6 +24,7 @@
#ifndef SHARE_OPTO_VTRANSFORM_HPP
#define SHARE_OPTO_VTRANSFORM_HPP
#include "libadt/vectset.hpp"
#include "opto/node.hpp"
#include "opto/vectorization.hpp"
#include "opto/vectornode.hpp"
@ -192,7 +193,6 @@ public:
const GrowableArray<VTransformNode*>& vtnodes() const { return _vtnodes; }
const GrowableArray<VTransformNode*>& get_schedule() const { return _schedule; }
void optimize(VTransform& vtransform);
bool schedule();
bool has_store_to_load_forwarding_failure(const VLoopAnalyzer& vloop_analyzer) const;
float cost_for_vector_loop() const;
@ -257,7 +257,7 @@ public:
DEBUG_ONLY( bool has_graph() const { return !_graph.is_empty(); } )
VTransformGraph& graph() { return _graph; }
void optimize() { return _graph.optimize(*this); }
void optimize();
bool schedule() { return _graph.schedule(); }
bool is_profitable() const;
float cost_for_vector_loop() const { return _graph.cost_for_vector_loop(); }
@ -291,6 +291,36 @@ private:
void apply_vectorization() const;
};
// We keep track of the worklist during optimizations.
// The concept is somewhat parallel to IGVN: we keep on
// optimizing vtnodes on the worklist, which may in turn
// add more nodes to the list. We keep on optimizing until
// no more nodes are on the worklist.
class VTransformOptimize : public StackObj {
private:
const VLoopAnalyzer& _vloop_analyzer;
VTransform& _vtransform;
GrowableArray<VTransformNode*> _worklist;
VectorSet _worklist_set;
public:
VTransformOptimize(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform) :
_vloop_analyzer(vloop_analyzer),
_vtransform(vtransform) {}
const VLoopAnalyzer& vloop_analyzer() const { return _vloop_analyzer; }
VTransform& vtransform() { return _vtransform; }
void worklist_push(VTransformNode* vtn);
void optimize();
private:
VTransformNode* worklist_pop();
bool optimize_step(VTransformNode* vtn);
DEBUG_ONLY( void verify(); )
};
// Keeps track of the state during "VTransform::apply"
// -> keep track of the already transformed nodes and the memory state.
class VTransformApplyState : public StackObj {
@ -531,10 +561,15 @@ public:
bool is_alive() const { return _is_alive; }
void mark_dead() {
void mark_dead(VTransformOptimize& vtoptimize) {
_is_alive = false;
// Remove all inputs
// Remove all inputs, and put inputs on worklist in
// case they are also dead.
for (uint i = 0; i < req(); i++) {
VTransformNode* in = in_req(i);
if (in != nullptr) {
vtoptimize.worklist_push(in);
}
set_req(i, nullptr);
}
}
@ -558,7 +593,7 @@ public:
virtual const VPointer& vpointer() const { ShouldNotReachHere(); }
virtual bool is_loop_head_phi() const { return false; }
virtual bool optimize(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform) { return false; }
virtual bool optimize(VTransformOptimize& vtoptimize) { return false; }
virtual float cost(const VLoopAnalyzer& vloop_analyzer) const = 0;
@ -868,7 +903,7 @@ public:
VTransformReductionVectorNode(VTransform& vtransform, const VTransformVectorNodeProperties properties) :
VTransformVectorNode(vtransform, 3, properties) {}
virtual VTransformReductionVectorNode* isa_ReductionVector() override { return this; }
virtual bool optimize(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform) override;
virtual bool optimize(VTransformOptimize& vtoptimize) override;
virtual float cost(const VLoopAnalyzer& vloop_analyzer) const override;
virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override;
NOT_PRODUCT(virtual const char* name() const override { return "ReductionVector"; };)
@ -876,8 +911,8 @@ public:
private:
int vector_reduction_opcode() const;
bool requires_strict_order() const;
bool optimize_move_non_strict_order_reductions_out_of_loop_preconditions(VTransform& vtransform);
bool optimize_move_non_strict_order_reductions_out_of_loop(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform);
bool optimize_move_non_strict_order_reductions_out_of_loop_preconditions(const VTransform& vtransform);
bool optimize_move_non_strict_order_reductions_out_of_loop(VTransformOptimize& vtoptimize);
};
class VTransformPhiVectorNode : public VTransformVectorNode {

View File

@ -1478,10 +1478,10 @@ void Arguments::set_conservative_max_heap_alignment() {
// the alignments imposed by several sources: any requirements from the heap
// itself and the maximum page size we may run the VM with.
size_t heap_alignment = GCConfig::arguments()->conservative_max_heap_alignment();
_conservative_max_heap_alignment = MAX4(heap_alignment,
_conservative_max_heap_alignment = MAX3(heap_alignment,
os::vm_allocation_granularity(),
os::max_page_size(),
GCArguments::compute_heap_alignment());
os::max_page_size());
assert(is_power_of_2(_conservative_max_heap_alignment), "Expected to be a power-of-2");
}
jint Arguments::set_ergonomics_flags() {
@ -1589,8 +1589,8 @@ void Arguments::set_heap_size() {
}
if (UseCompressedOops) {
size_t heap_end = HeapBaseMinAddress + MaxHeapSize;
size_t max_coop_heap = max_heap_for_compressed_oops();
uintptr_t heap_end = HeapBaseMinAddress + MaxHeapSize;
uintptr_t max_coop_heap = max_heap_for_compressed_oops();
// Limit the heap size to the maximum possible when using compressed oops
if (heap_end < max_coop_heap) {
@ -1607,7 +1607,7 @@ void Arguments::set_heap_size() {
aot_log_info(aot)("UseCompressedOops disabled due to "
"max heap %zu > compressed oop heap %zu. "
"Please check the setting of MaxRAMPercentage %5.2f.",
reasonable_max, max_coop_heap, MaxRAMPercentage);
reasonable_max, (size_t)max_coop_heap, MaxRAMPercentage);
FLAG_SET_ERGO(UseCompressedOops, false);
} else {
reasonable_max = max_coop_heap;

View File

@ -75,6 +75,7 @@
// v.release_store(x) -> void
// v.release_store_fence(x) -> void
// v.compare_exchange(x, y [, o]) -> T
// v.exchange(x [, o]) -> T
//
// (2) All atomic types are default constructible.
//
@ -92,7 +93,6 @@
// (3) Atomic pointers and atomic integers additionally provide
//
// member functions:
// v.exchange(x [, o]) -> T
// v.add_then_fetch(i [, o]) -> T
// v.sub_then_fetch(i [, o]) -> T
// v.fetch_then_add(i [, o]) -> T
@ -102,10 +102,7 @@
// type of i must be signed, or both must be unsigned. Atomic pointers perform
// element arithmetic.
//
// (4) An atomic translated type additionally provides the exchange
// function if its associated atomic decayed type provides that function.
//
// (5) Atomic integers additionally provide
// (4) Atomic integers additionally provide
//
// member functions:
// v.and_then_fetch(x [, o]) -> T
@ -115,7 +112,7 @@
// v.fetch_then_or(x [, o]) -> T
// v.fetch_then_xor(x [, o]) -> T
//
// (6) Atomic pointers additionally provide
// (5) Atomic pointers additionally provide
//
// nested types:
// ElementType -> std::remove_pointer_t<T>
@ -127,9 +124,6 @@
// stand out a little more when used in surrounding non-atomic code. Without
// the "AtomicAccess::" qualifier, some of those names are easily overlooked.
//
// Atomic bytes don't provide exchange(). This is because that operation
// hasn't been implemented for 1 byte values. That could be changed if needed.
//
// Atomic for 2 byte integers is not supported. This is because atomic
// operations of that size have not been implemented. There haven't been
// required use-cases. Many platforms don't provide hardware support.
@ -184,15 +178,8 @@ private:
// Helper base classes, providing various parts of the APIs.
template<typename T> class CommonCore;
template<typename T> class SupportsExchange;
template<typename T> class SupportsArithmetic;
// Support conditional exchange() for atomic translated types.
template<typename T> class HasExchange;
template<typename T> class DecayedHasExchange;
template<typename Derived, typename T, bool = DecayedHasExchange<T>::value>
class TranslatedExchange;
public:
template<typename T, Category = category<T>()>
class Atomic;
@ -275,15 +262,7 @@ public:
atomic_memory_order order = memory_order_conservative) {
return AtomicAccess::cmpxchg(value_ptr(), compare_value, new_value, order);
}
};
template<typename T>
class AtomicImpl::SupportsExchange : public CommonCore<T> {
protected:
explicit SupportsExchange(T value) : CommonCore<T>(value) {}
~SupportsExchange() = default;
public:
T exchange(T new_value,
atomic_memory_order order = memory_order_conservative) {
return AtomicAccess::xchg(this->value_ptr(), new_value, order);
@ -291,7 +270,7 @@ public:
};
template<typename T>
class AtomicImpl::SupportsArithmetic : public SupportsExchange<T> {
class AtomicImpl::SupportsArithmetic : public CommonCore<T> {
// Guarding the AtomicAccess calls with constexpr checking of Offset produces
// better compile-time error messages.
template<typename Offset>
@ -311,7 +290,7 @@ class AtomicImpl::SupportsArithmetic : public SupportsExchange<T> {
}
protected:
explicit SupportsArithmetic(T value) : SupportsExchange<T>(value) {}
explicit SupportsArithmetic(T value) : CommonCore<T>(value) {}
~SupportsArithmetic() = default;
public:
@ -424,54 +403,8 @@ public:
// Atomic translated type
// Test whether Atomic<T> has exchange().
template<typename T>
class AtomicImpl::HasExchange {
template<typename Check> static void* test(decltype(&Check::exchange));
template<typename> static int test(...);
using test_type = decltype(test<Atomic<T>>(nullptr));
public:
static constexpr bool value = std::is_pointer_v<test_type>;
};
// Test whether the atomic decayed type associated with T has exchange().
template<typename T>
class AtomicImpl::DecayedHasExchange {
using Translator = PrimitiveConversions::Translate<T>;
using Decayed = typename Translator::Decayed;
// "Unit test" HasExchange<>.
static_assert(HasExchange<int>::value);
static_assert(HasExchange<int*>::value);
static_assert(!HasExchange<char>::value);
public:
static constexpr bool value = HasExchange<Decayed>::value;
};
// Base class for atomic translated type if atomic decayed type doesn't have
// exchange().
template<typename Derived, typename T, bool>
class AtomicImpl::TranslatedExchange {};
// Base class for atomic translated type if atomic decayed type does have
// exchange().
template<typename Derived, typename T>
class AtomicImpl::TranslatedExchange<Derived, T, true> {
public:
T exchange(T new_value,
atomic_memory_order order = memory_order_conservative) {
return static_cast<Derived*>(this)->exchange_impl(new_value, order);
}
};
template<typename T>
class AtomicImpl::Atomic<T, AtomicImpl::Category::Translated>
: public TranslatedExchange<Atomic<T>, T>
{
// Give TranslatedExchange<> access to exchange_impl() if needed.
friend class TranslatedExchange<Atomic<T>, T>;
class AtomicImpl::Atomic<T, AtomicImpl::Category::Translated> {
using Translator = PrimitiveConversions::Translate<T>;
using Decayed = typename Translator::Decayed;
@ -533,12 +466,7 @@ public:
order));
}
private:
// Implementation of exchange() if needed.
// Exclude when not needed, to prevent reference to non-existent function
// of atomic decayed type if someone explicitly instantiates Atomic<T>.
template<typename Dep = Decayed, ENABLE_IF(HasExchange<Dep>::value)>
T exchange_impl(T new_value, atomic_memory_order order) {
T exchange(T new_value, atomic_memory_order order = memory_order_conservative) {
return recover(_value.exchange(decay(new_value), order));
}
};

View File

@ -419,8 +419,8 @@ private:
struct XchgImpl;
// Platform-specific implementation of xchg. Support for sizes
// of 4, and sizeof(intptr_t) are required. The class is a function
// object that must be default constructable, with these requirements:
// of 1, 4, and 8 are required. The class is a function object
// that must be default constructable, with these requirements:
//
// - dest is of type T*.
// - exchange_value is of type T.
@ -635,7 +635,6 @@ inline void AtomicAccess::dec(D volatile* dest, atomic_memory_order order) {
STATIC_ASSERT(std::is_pointer<D>::value || std::is_integral<D>::value);
using I = std::conditional_t<std::is_pointer<D>::value, ptrdiff_t, D>;
// Assumes two's complement integer representation.
#pragma warning(suppress: 4146)
AtomicAccess::add(dest, I(-1), order);
}
@ -652,7 +651,6 @@ inline D AtomicAccess::sub(D volatile* dest, I sub_value, atomic_memory_order or
STATIC_ASSERT(sizeof(I) <= sizeof(AddendType));
AddendType addend = sub_value;
// Assumes two's complement integer representation.
#pragma warning(suppress: 4146) // In case AddendType is not signed.
return AtomicAccess::add(dest, -addend, order);
}

View File

@ -118,8 +118,5 @@ ThreadTotalCPUTimeClosure::~ThreadTotalCPUTimeClosure() {
}
void ThreadTotalCPUTimeClosure::do_thread(Thread* thread) {
// The default code path (fast_thread_cpu_time()) asserts that
// pthread_getcpuclockid() and clock_gettime() must return 0. Thus caller
// must ensure the thread exists and has not terminated.
_total += os::thread_cpu_time(thread);
}

View File

@ -162,7 +162,7 @@ void JVMFlag::print_on(outputStream* st, bool withComments, bool printRanges) co
// uintx ThresholdTolerance = 10 {product} {default}
// size_t TLABSize = 0 {product} {default}
// uintx SurvivorRatio = 8 {product} {default}
// double InitialRAMPercentage = 1.562500 {product} {default}
// double InitialRAMPercentage = 0.000000 {product} {default}
// ccstr CompileCommandFile = MyFile.cmd {product} {command line}
// ccstrlist CompileOnly = Method1
// CompileOnly += Method2 {product} {command line}

View File

@ -664,6 +664,7 @@ void VMError::report(outputStream* st, bool _verbose) {
BEGIN
if (MemTracker::enabled() &&
NmtVirtualMemory_lock != nullptr &&
_thread != nullptr &&
NmtVirtualMemory_lock->owned_by_self()) {
// Manually unlock to avoid reentrancy due to mallocs in detailed mode.
NmtVirtualMemory_lock->unlock();
@ -1305,7 +1306,7 @@ void VMError::report(outputStream* st, bool _verbose) {
os::print_signal_handlers(st, buf, sizeof(buf));
st->cr();
STEP_IF("Native Memory Tracking", _verbose)
STEP_IF("Native Memory Tracking", _verbose && _thread != nullptr)
MemTracker::error_report(st);
st->cr();

View File

@ -23,7 +23,6 @@
*
*/
#include "runtime/atomicAccess.hpp"
#include "runtime/orderAccess.hpp"
#include "runtime/os.hpp"
#include "utilities/spinYield.hpp"
@ -79,10 +78,10 @@
void GenericWaitBarrier::arm(int barrier_tag) {
assert(barrier_tag != 0, "Pre arm: Should be arming with armed value");
assert(AtomicAccess::load(&_barrier_tag) == 0,
assert(_barrier_tag.load_relaxed() == 0,
"Pre arm: Should not be already armed. Tag: %d",
AtomicAccess::load(&_barrier_tag));
AtomicAccess::release_store(&_barrier_tag, barrier_tag);
_barrier_tag.load_relaxed());
_barrier_tag.release_store(barrier_tag);
Cell &cell = tag_to_cell(barrier_tag);
cell.arm(barrier_tag);
@ -92,9 +91,9 @@ void GenericWaitBarrier::arm(int barrier_tag) {
}
void GenericWaitBarrier::disarm() {
int barrier_tag = AtomicAccess::load_acquire(&_barrier_tag);
int barrier_tag = _barrier_tag.load_acquire();
assert(barrier_tag != 0, "Pre disarm: Should be armed. Tag: %d", barrier_tag);
AtomicAccess::release_store(&_barrier_tag, 0);
_barrier_tag.release_store(0);
Cell &cell = tag_to_cell(barrier_tag);
cell.disarm(barrier_tag);
@ -121,7 +120,7 @@ void GenericWaitBarrier::Cell::arm(int32_t requested_tag) {
SpinYield sp;
while (true) {
state = AtomicAccess::load_acquire(&_state);
state = _state.load_acquire();
assert(decode_tag(state) == 0,
"Pre arm: Should not be armed. "
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
@ -134,7 +133,7 @@ void GenericWaitBarrier::Cell::arm(int32_t requested_tag) {
// Try to swing cell to armed. This should always succeed after the check above.
int64_t new_state = encode(requested_tag, 0);
int64_t prev_state = AtomicAccess::cmpxchg(&_state, state, new_state);
int64_t prev_state = _state.compare_exchange(state, new_state);
if (prev_state != state) {
fatal("Cannot arm the wait barrier. "
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
@ -145,14 +144,14 @@ void GenericWaitBarrier::Cell::arm(int32_t requested_tag) {
int GenericWaitBarrier::Cell::signal_if_needed(int max) {
int signals = 0;
while (true) {
int cur = AtomicAccess::load_acquire(&_outstanding_wakeups);
int cur = _outstanding_wakeups.load_acquire();
if (cur == 0) {
// All done, no more waiters.
return 0;
}
assert(cur > 0, "Sanity");
int prev = AtomicAccess::cmpxchg(&_outstanding_wakeups, cur, cur - 1);
int prev = _outstanding_wakeups.compare_exchange(cur, cur - 1);
if (prev != cur) {
// Contention, return to caller for early return or backoff.
return prev;
@ -172,7 +171,7 @@ void GenericWaitBarrier::Cell::disarm(int32_t expected_tag) {
int32_t waiters;
while (true) {
int64_t state = AtomicAccess::load_acquire(&_state);
int64_t state = _state.load_acquire();
int32_t tag = decode_tag(state);
waiters = decode_waiters(state);
@ -182,7 +181,7 @@ void GenericWaitBarrier::Cell::disarm(int32_t expected_tag) {
tag, waiters);
int64_t new_state = encode(0, waiters);
if (AtomicAccess::cmpxchg(&_state, state, new_state) == state) {
if (_state.compare_exchange(state, new_state) == state) {
// Successfully disarmed.
break;
}
@ -191,19 +190,19 @@ void GenericWaitBarrier::Cell::disarm(int32_t expected_tag) {
// Wake up waiters, if we have at least one.
// Allow other threads to assist with wakeups, if possible.
if (waiters > 0) {
AtomicAccess::release_store(&_outstanding_wakeups, waiters);
_outstanding_wakeups.release_store(waiters);
SpinYield sp;
while (signal_if_needed(INT_MAX) > 0) {
sp.wait();
}
}
assert(AtomicAccess::load(&_outstanding_wakeups) == 0, "Post disarm: Should not have outstanding wakeups");
assert(_outstanding_wakeups.load_relaxed() == 0, "Post disarm: Should not have outstanding wakeups");
}
void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
// Try to register ourselves as pending waiter.
while (true) {
int64_t state = AtomicAccess::load_acquire(&_state);
int64_t state = _state.load_acquire();
int32_t tag = decode_tag(state);
if (tag != expected_tag) {
// Cell tag had changed while waiting here. This means either the cell had
@ -219,7 +218,7 @@ void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
tag, waiters);
int64_t new_state = encode(tag, waiters + 1);
if (AtomicAccess::cmpxchg(&_state, state, new_state) == state) {
if (_state.compare_exchange(state, new_state) == state) {
// Success! Proceed to wait.
break;
}
@ -238,7 +237,7 @@ void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
// Register ourselves as completed waiter before leaving.
while (true) {
int64_t state = AtomicAccess::load_acquire(&_state);
int64_t state = _state.load_acquire();
int32_t tag = decode_tag(state);
int32_t waiters = decode_waiters(state);
@ -248,7 +247,7 @@ void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
tag, waiters);
int64_t new_state = encode(tag, waiters - 1);
if (AtomicAccess::cmpxchg(&_state, state, new_state) == state) {
if (_state.compare_exchange(state, new_state) == state) {
// Success!
break;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,6 +27,7 @@
#include "memory/allocation.hpp"
#include "memory/padded.hpp"
#include "runtime/atomic.hpp"
#include "runtime/semaphore.hpp"
#include "utilities/globalDefinitions.hpp"
@ -43,10 +44,10 @@ private:
Semaphore _sem;
// Cell state, tracks the arming + waiters status
volatile int64_t _state;
Atomic<int64_t> _state;
// Wakeups to deliver for current waiters
volatile int _outstanding_wakeups;
Atomic<int> _outstanding_wakeups;
int signal_if_needed(int max);
@ -83,7 +84,7 @@ private:
// Trailing padding to protect the last cell.
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_PADDING_SIZE, 0);
volatile int _barrier_tag;
Atomic<int> _barrier_tag;
// Trailing padding to insulate the rest of the barrier from adjacent
// data structures. The leading padding is not needed, as cell padding

View File

@ -315,6 +315,18 @@ final class VirtualThread extends BaseVirtualThread {
}
}
/**
* Submits the given task to the given executor. If the scheduler is a
* ForkJoinPool then the task is first adapted to a ForkJoinTask.
*/
private void submit(Executor executor, Runnable task) {
if (executor instanceof ForkJoinPool pool) {
pool.submit(ForkJoinTask.adapt(task));
} else {
executor.execute(task);
}
}
/**
* Submits the runContinuation task to the scheduler. For the default scheduler,
* and calling it on a worker thread, the task will be pushed to the local queue,
@ -335,12 +347,12 @@ final class VirtualThread extends BaseVirtualThread {
if (currentThread().isVirtual()) {
Continuation.pin();
try {
scheduler.execute(runContinuation);
submit(scheduler, runContinuation);
} finally {
Continuation.unpin();
}
} else {
scheduler.execute(runContinuation);
submit(scheduler, runContinuation);
}
done = true;
} catch (RejectedExecutionException ree) {
@ -1536,4 +1548,4 @@ final class VirtualThread extends BaseVirtualThread {
unblocker.setDaemon(true);
unblocker.start();
}
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -72,7 +72,7 @@ public interface TypeVariable<D extends GenericDeclaration> extends Type, Annota
Type[] getBounds();
/**
* Returns the {@code GenericDeclaration} object representing the
* Returns a {@code GenericDeclaration} object representing the
* generic declaration declared for this type variable.
*
* @return the generic declaration declared for this type variable.

View File

@ -203,7 +203,7 @@ public final class Files {
* @throws UnsupportedOperationException
* if an unsupported option is specified
* @throws FileAlreadyExistsException
* If a file of that name already exists and the {@link
* If the path locates an existing file and the {@link
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
* <i>(optional specific exception)</i>
* @throws IOException
@ -340,7 +340,7 @@ public final class Files {
* if an unsupported open option is specified or the array contains
* attributes that cannot be set atomically when creating the file
* @throws FileAlreadyExistsException
* If a file of that name already exists and the {@link
* If the path locates an existing file and the {@link
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
* and the file is being opened for writing <i>(optional specific
* exception)</i>
@ -377,7 +377,7 @@ public final class Files {
* @throws UnsupportedOperationException
* if an unsupported open option is specified
* @throws FileAlreadyExistsException
* If a file of that name already exists and the {@link
* If the path locates an existing file and the {@link
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
* and the file is being opened for writing <i>(optional specific
* exception)</i>
@ -575,10 +575,11 @@ public final class Files {
Set.of(StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE);
/**
* Creates a new and empty file, failing if the file already exists. The
* check for the existence of the file and the creation of the new file if
* it does not exist are a single operation that is atomic with respect to
* all other filesystem activities that might affect the directory.
* Creates a new and empty file, failing if {@code path} locates an existing
* file. The check for the existence of the file and the creation of the new
* file if it does not exist are a single operation that is atomic with
* respect to all other filesystem activities that might affect the
* directory.
*
* <p> The {@code attrs} parameter is optional {@link FileAttribute
* file-attributes} to set atomically when creating the file. Each attribute
@ -598,7 +599,7 @@ public final class Files {
* if the array contains an attribute that cannot be set atomically
* when creating the file
* @throws FileAlreadyExistsException
* If a file of that name already exists
* if {@code path} locates an existing file
* <i>(optional specific exception)</i>
* @throws IOException
* if an I/O error occurs or the parent directory does not exist
@ -611,7 +612,8 @@ public final class Files {
}
/**
* Creates a new directory. The check for the existence of the file and the
* Creates a new directory, failing if {@code dir} locates an existing
* file. The check for the existence of the file and the
* creation of the directory if it does not exist are a single operation
* that is atomic with respect to all other filesystem activities that might
* affect the directory. The {@link #createDirectories createDirectories}
@ -636,8 +638,8 @@ public final class Files {
* if the array contains an attribute that cannot be set atomically
* when creating the directory
* @throws FileAlreadyExistsException
* if a directory could not otherwise be created because a file of
* that name already exists <i>(optional specific exception)</i>
* if {@code dir} locates an existing file
* <i>(optional specific exception)</i>
* @throws IOException
* if an I/O error occurs or the parent directory does not exist
*/
@ -676,8 +678,8 @@ public final class Files {
* if the array contains an attribute that cannot be set atomically
* when creating the directory
* @throws FileAlreadyExistsException
* if {@code dir} exists but is not a directory <i>(optional specific
* exception)</i>
* if {@code dir} locates an existing file that is not a directory
* <i>(optional specific exception)</i>
* @throws IOException
* if an I/O error occurs
*/
@ -930,7 +932,8 @@ public final class Files {
}
/**
* Creates a symbolic link to a target <i>(optional operation)</i>.
* Creates a symbolic link to a target, failing if {@code link} locates an
* existing file <i>(optional operation)</i>.
*
* <p> The {@code target} parameter is the target of the link. It may be an
* {@link Path#isAbsolute absolute} or relative path and may not exist. When
@ -964,8 +967,8 @@ public final class Files {
* array contains an attribute that cannot be set atomically when
* creating the symbolic link
* @throws FileAlreadyExistsException
* if a file with the name already exists <i>(optional specific
* exception)</i>
* if {@code link} locates an existing file
* <i>(optional specific exception)</i>
* @throws IOException
* if an I/O error occurs
*/
@ -978,7 +981,8 @@ public final class Files {
}
/**
* Creates a new link (directory entry) for an existing file <i>(optional
* Creates a new link (directory entry) for an existing file,
* failing if {@code link} locates an existing file <i>(optional
* operation)</i>.
*
* <p> The {@code link} parameter locates the directory entry to create.
@ -1007,8 +1011,8 @@ public final class Files {
* if the implementation does not support adding an existing file
* to a directory
* @throws FileAlreadyExistsException
* if the entry could not otherwise be created because a file of
* that name already exists <i>(optional specific exception)</i>
* if {@code link} locates an existing file
* <i>(optional specific exception)</i>
* @throws IOException
* if an I/O error occurs
*/
@ -2711,7 +2715,7 @@ public final class Files {
* @throws UnsupportedOperationException
* if an unsupported option is specified
* @throws FileAlreadyExistsException
* If a file of that name already exists and the {@link
* If the path locates an existing file and the {@link
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
* <i>(optional specific exception)</i>
*
@ -2754,7 +2758,7 @@ public final class Files {
* @throws UnsupportedOperationException
* if an unsupported option is specified
* @throws FileAlreadyExistsException
* If a file of that name already exists and the {@link
* If the path locates an existing file and the {@link
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
* <i>(optional specific exception)</i>
*
@ -3161,7 +3165,7 @@ public final class Files {
* @throws UnsupportedOperationException
* if an unsupported option is specified
* @throws FileAlreadyExistsException
* If a file of that name already exists and the {@link
* If the path locates an existing file and the {@link
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
* <i>(optional specific exception)</i>
*/
@ -3222,7 +3226,7 @@ public final class Files {
* @throws UnsupportedOperationException
* if an unsupported option is specified
* @throws FileAlreadyExistsException
* If a file of that name already exists and the {@link
* If the path locates an existing file and the {@link
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
* <i>(optional specific exception)</i>
*/

View File

@ -560,89 +560,70 @@ public class ForkJoinPool extends AbstractExecutorService
* access (which is usually needed anyway).
*
* Signalling. Signals (in signalWork) cause new or reactivated
* workers to scan for tasks. Method signalWork and its callers
* try to approximate the unattainable goal of having the right
* number of workers activated for the tasks at hand, but must err
* on the side of too many workers vs too few to avoid stalls:
* workers to scan for tasks. SignalWork is invoked in two cases:
* (1) When a task is pushed onto an empty queue, and (2) When a
* worker takes a top-level task from a queue that has additional
* tasks. Together, these suffice in O(log(#threads)) steps to
* fully activate with at least enough workers, and ideally no
* more than required. This ideal is unobtainable: Callers do not
* know whether another worker will finish its current task and
* poll for others without need of a signal (which is otherwise an
* advantage of work-stealing vs other schemes), and also must
* conservatively estimate the triggering conditions of emptiness
* or non-emptiness; all of which usually cause more activations
* than necessary (see below). (Method signalWork is also used as
* failsafe in case of Thread failures in deregisterWorker, to
* activate or create a new worker to replace them).
*
* * If computations are purely tree structured, it suffices for
* every worker to activate another when it pushes a task into
* an empty queue, resulting in O(log(#threads)) steps to full
* activation. Emptiness must be conservatively approximated,
* which may result in unnecessary signals. Also, to reduce
* resource usages in some cases, at the expense of slower
* startup in others, activation of an idle thread is preferred
* over creating a new one, here and elsewhere.
*
* * At the other extreme, if "flat" tasks (those that do not in
* turn generate others) come in serially from only a single
* producer, each worker taking a task from a queue should
* propagate a signal if there are more tasks in that
* queue. This is equivalent to, but generally faster than,
* arranging the stealer take multiple tasks, re-pushing one or
* more on its own queue, and signalling (because its queue is
* empty), also resulting in logarithmic full activation
* time. If tasks do not not engage in unbounded loops based on
* the actions of other workers with unknown dependencies loop,
* this form of proagation can be limited to one signal per
* activation (phase change). We distinguish the cases by
* further signalling only if the task is an InterruptibleTask
* (see below), which are the only supported forms of task that
* may do so.
*
* * Because we don't know about usage patterns (or most commonly,
* mixtures), we use both approaches, which present even more
* opportunities to over-signal. (Failure to distinguish these
* cases in terms of submission methods was arguably an early
* design mistake.) Note that in either of these contexts,
* signals may be (and often are) unnecessary because active
* workers continue scanning after running tasks without the
* need to be signalled (which is one reason work stealing is
* often faster than alternatives), so additional workers
* aren't needed.
*
* * For rapidly branching tasks that require full pool resources,
* oversignalling is OK, because signalWork will soon have no
* more workers to create or reactivate. But for others (mainly
* externally submitted tasks), overprovisioning may cause very
* noticeable slowdowns due to contention and resource
* wastage. We reduce impact by deactivating workers when
* queues don't have accessible tasks, but reactivating and
* rescanning if other tasks remain.
*
* * Despite these, signal contention and overhead effects still
* occur during ramp-up and ramp-down of small computations.
* Top-Level scheduling
* ====================
*
* Scanning. Method runWorker performs top-level scanning for (and
* execution of) tasks by polling a pseudo-random permutation of
* the array (by starting at a given index, and using a constant
* cyclically exhaustive stride.) It uses the same basic polling
* method as WorkQueue.poll(), but restarts with a different
* permutation on each invocation. The pseudorandom generator
* need not have high-quality statistical properties in the long
* permutation on each rescan. The pseudorandom generator need
* not have high-quality statistical properties in the long
* term. We use Marsaglia XorShifts, seeded with the Weyl sequence
* from ThreadLocalRandom probes, which are cheap and
* suffice. Each queue's polling attempts to avoid becoming stuck
* when other scanners/pollers stall. Scans do not otherwise
* explicitly take into account core affinities, loads, cache
* localities, etc, However, they do exploit temporal locality
* (which usually approximates these) by preferring to re-poll
* from the same queue after a successful poll before trying
* others, which also reduces bookkeeping, cache traffic, and
* scanning overhead. But it also reduces fairness, which is
* partially counteracted by giving up on detected interference
* (which also reduces contention when too many workers try to
* take small tasks from the same queue).
* from ThreadLocalRandom probes, which are cheap and suffice.
*
* Deactivation. When no tasks are found by a worker in runWorker,
* it tries to deactivate()), giving up (and rescanning) on "ctl"
* contention. To avoid missed signals during deactivation, the
* method rescans and reactivates if there may have been a missed
* signal during deactivation. To reduce false-alarm reactivations
* while doing so, we scan multiple times (analogously to method
* quiescent()) before trying to reactivate. Because idle workers
* are often not yet blocked (parked), we use a WorkQueue field to
* advertise that a waiter actually needs unparking upon signal.
* it invokes deactivate, that first deactivates (to an IDLE
* phase). Avoiding missed signals during deactivation requires a
* (conservative) rescan, reactivating if there may be tasks to
* poll. Because idle workers are often not yet blocked (parked),
* we use a WorkQueue field to advertise that a waiter actually
* needs unparking upon signal.
*
* When tasks are constructed as (recursive) DAGs, top-level
* scanning is usually infrequent, and doesn't encounter most
* of the following problems addressed by runWorker and awaitWork:
*
* Locality. Polls are organized into "runs", continuing until
* empty or contended, while also minimizing interference by
* postponing bookeeping to ends of runs. This may reduce
* fairness.
*
* Contention. When many workers try to poll few queues, they
* often collide, generating CAS failures and disrupting locality
* of workers already running their tasks. This also leads to
* stalls when tasks cannot be taken because other workers have
* not finished poll operations, which is detected by reading
* ahead in queue arrays. In both cases, workers restart scans in a
* way that approximates randomized backoff.
*
* Oversignalling. When many short top-level tasks are present in
* a small number of queues, the above signalling strategy may
* activate many more workers than needed, worsening locality and
* contention problems, while also generating more global
* contention (field ctl is CASed on every activation and
* deactivation). We filter out (both in runWorker and
* signalWork) attempted signals that are surely not needed
* because the signalled tasks are already taken.
*
* Shutdown and Quiescence
* =======================
*
* Quiescence. Workers scan looking for work, giving up when they
* don't find any, without being sure that none are available.
@ -892,9 +873,7 @@ public class ForkJoinPool extends AbstractExecutorService
* shutdown, runners are interrupted so they can cancel. Since
* external joining callers never run these tasks, they must await
* cancellation by others, which can occur along several different
* paths. The inability to rely on caller-runs may also require
* extra signalling (resulting in scanning and contention) so is
* done only conditionally in methods push and runworker.
* paths.
*
* Across these APIs, rules for reporting exceptions for tasks
* with results accessed via join() differ from those via get(),
@ -961,9 +940,13 @@ public class ForkJoinPool extends AbstractExecutorService
* less-contended applications. To help arrange this, some
* non-reference fields are declared as "long" even when ints or
* shorts would suffice. For class WorkQueue, an
* embedded @Contended region segregates fields most heavily
* updated by owners from those most commonly read by stealers or
* other management.
* embedded @Contended isolates the very busy top index, along
* with status and bookkeeping fields written (mostly) by owners,
* that otherwise interfere with reading array and base
* fields. There are other variables commonly contributing to
* false-sharing-related performance issues (including fields of
* class Thread), but we can't do much about this except try to
* minimize access.
*
* Initial sizing and resizing of WorkQueue arrays is an even more
* delicate tradeoff because the best strategy systematically
@ -972,13 +955,11 @@ public class ForkJoinPool extends AbstractExecutorService
* direct false-sharing and indirect cases due to GC bookkeeping
* (cardmarks etc), and reduce the number of resizes, which are
* not especially fast because they require atomic transfers.
* Currently, arrays for workers are initialized to be just large
* enough to avoid resizing in most tree-structured tasks, but
* larger for external queues where both false-sharing problems
* and the need for resizing are more common. (Maintenance note:
* any changes in fields, queues, or their uses, or JVM layout
* policies, must be accompanied by re-evaluation of these
* placement and sizing decisions.)
* Currently, arrays are initialized to be just large enough to
* avoid resizing in most tree-structured tasks, but grow rapidly
* until large. (Maintenance note: any changes in fields, queues,
* or their uses, or JVM layout policies, must be accompanied by
* re-evaluation of these placement and sizing decisions.)
*
* Style notes
* ===========
@ -1061,17 +1042,11 @@ public class ForkJoinPool extends AbstractExecutorService
static final int DEFAULT_COMMON_MAX_SPARES = 256;
/**
* Initial capacity of work-stealing queue array for workers.
* Initial capacity of work-stealing queue array.
* Must be a power of two, at least 2. See above.
*/
static final int INITIAL_QUEUE_CAPACITY = 1 << 6;
/**
* Initial capacity of work-stealing queue array for external queues.
* Must be a power of two, at least 2. See above.
*/
static final int INITIAL_EXTERNAL_QUEUE_CAPACITY = 1 << 9;
// conversions among short, int, long
static final int SMASK = 0xffff; // (unsigned) short bits
static final long LMASK = 0xffffffffL; // lower 32 bits of long
@ -1211,11 +1186,11 @@ public class ForkJoinPool extends AbstractExecutorService
@jdk.internal.vm.annotation.Contended("w")
int stackPred; // pool stack (ctl) predecessor link
@jdk.internal.vm.annotation.Contended("w")
volatile int parking; // nonzero if parked in awaitWork
@jdk.internal.vm.annotation.Contended("w")
volatile int source; // source queue id (or DROPPED)
@jdk.internal.vm.annotation.Contended("w")
int nsteals; // number of steals from other queues
@jdk.internal.vm.annotation.Contended("w")
volatile int parking; // nonzero if parked in awaitWork
// Support for atomic operations
private static final Unsafe U;
@ -1248,11 +1223,11 @@ public class ForkJoinPool extends AbstractExecutorService
*/
WorkQueue(ForkJoinWorkerThread owner, int id, int cfg,
boolean clearThreadLocals) {
array = new ForkJoinTask<?>[owner == null ?
INITIAL_EXTERNAL_QUEUE_CAPACITY :
INITIAL_QUEUE_CAPACITY];
this.owner = owner;
this.config = (clearThreadLocals) ? cfg | CLEAR_TLS : cfg;
if ((this.owner = owner) == null) {
array = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY];
phase = id | IDLE;
}
}
/**
@ -1279,27 +1254,27 @@ public class ForkJoinPool extends AbstractExecutorService
* @throws RejectedExecutionException if array could not be resized
*/
final void push(ForkJoinTask<?> task, ForkJoinPool pool, boolean internal) {
int s = top, b = base, m, cap, room; ForkJoinTask<?>[] a;
if ((a = array) != null && (cap = a.length) > 0 && // else disabled
task != null) {
int pk = task.noUserHelp() + 1; // prev slot offset
if ((room = (m = cap - 1) - (s - b)) >= 0) {
int s = top, b = base, m, cap, room; ForkJoinTask<?>[] a, na;
if ((a = array) != null && (cap = a.length) > 0) { // else disabled
int k = (m = cap - 1) & s;
if ((room = m - (s - b)) >= 0) {
top = s + 1;
long pos = slotOffset(m & s);
long pos = slotOffset(k);
if (!internal)
U.putReference(a, pos, task); // inside lock
else
U.getAndSetReference(a, pos, task); // fully fenced
if (room == 0) // resize
growArray(a, cap, s);
if (room == 0 && (na = growArray(a, cap, s)) != null)
k = ((a = na).length - 1) & s; // resize
}
if (!internal)
unlockPhase();
if (room < 0)
throw new RejectedExecutionException("Queue capacity exceeded");
if ((room == 0 || a[m & (s - pk)] == null) &&
pool != null)
pool.signalWork(); // may have appeared empty
if (pool != null &&
(room == 0 ||
U.getReferenceAcquire(a, slotOffset(m & (s - 1))) == null))
pool.signalWork(a, k); // may have appeared empty
}
}
@ -1308,11 +1283,12 @@ public class ForkJoinPool extends AbstractExecutorService
* @param a old array
* @param cap old array capacity
* @param s current top
* @return new array, or null on failure
*/
private void growArray(ForkJoinTask<?>[] a, int cap, int s) {
int newCap = cap << 1;
private ForkJoinTask<?>[] growArray(ForkJoinTask<?>[] a, int cap, int s) {
int newCap = (cap >= 1 << 16) ? cap << 1 : cap << 2;
ForkJoinTask<?>[] newArray = null;
if (a != null && a.length == cap && cap > 0 && newCap > 0) {
ForkJoinTask<?>[] newArray = null;
try {
newArray = new ForkJoinTask<?>[newCap];
} catch (OutOfMemoryError ex) {
@ -1329,34 +1305,45 @@ public class ForkJoinPool extends AbstractExecutorService
updateArray(newArray); // fully fenced
}
}
return newArray;
}
/**
* Takes next task, if one exists, in order specified by mode,
* so acts as either local-pop or local-poll. Called only by owner.
* @param fifo nonzero if FIFO mode
* Takes next task, if one exists, in lifo order.
*/
private ForkJoinTask<?> nextLocalTask(int fifo) {
private ForkJoinTask<?> localPop() {
ForkJoinTask<?> t = null;
ForkJoinTask<?>[] a = array;
int b = base, p = top, cap;
if (p - b > 0 && a != null && (cap = a.length) > 0) {
for (int m = cap - 1, s, nb;;) {
if (fifo == 0 || (nb = b + 1) == p) {
if ((t = (ForkJoinTask<?>)U.getAndSetReference(
a, slotOffset(m & (s = p - 1)), null)) != null)
updateTop(s); // else lost race for only task
break;
int s = top - 1, cap; long k; ForkJoinTask<?>[] a;
if ((a = array) != null && (cap = a.length) > 0 &&
U.getReference(a, k = slotOffset((cap - 1) & s)) != null &&
(t = (ForkJoinTask<?>)U.getAndSetReference(a, k, null)) != null)
updateTop(s);
return t;
}
/**
* Takes next task, if one exists, in fifo order.
*/
private ForkJoinTask<?> localPoll() {
ForkJoinTask<?> t = null;
int p = top, cap; ForkJoinTask<?>[] a;
if ((a = array) != null && (cap = a.length) > 0) {
for (int b = base; p - b > 0; ) {
int nb = b + 1;
long k = slotOffset((cap - 1) & b);
if (U.getReference(a, k) == null) {
if (nb == p)
break; // else base is lagging
while (b == (b = U.getIntAcquire(this, BASE)))
Thread.onSpinWait(); // spin to reduce memory traffic
}
if ((t = (ForkJoinTask<?>)U.getAndSetReference(
a, slotOffset(m & b), null)) != null) {
else if ((t = (ForkJoinTask<?>)
U.getAndSetReference(a, k, null)) != null) {
updateBase(nb);
break;
}
while (b == (b = U.getIntAcquire(this, BASE)))
Thread.onSpinWait(); // spin to reduce memory traffic
if (p - b <= 0)
break;
else
b = base;
}
}
return t;
@ -1364,10 +1351,9 @@ public class ForkJoinPool extends AbstractExecutorService
/**
* Takes next task, if one exists, using configured mode.
* (Always internal, never called for Common pool.)
*/
final ForkJoinTask<?> nextLocalTask() {
return nextLocalTask(config & FIFO);
return (config & FIFO) == 0 ? localPop() : localPoll();
}
/**
@ -1443,12 +1429,12 @@ public class ForkJoinPool extends AbstractExecutorService
// specialized execution methods
/**
* Runs the given task, as well as remaining local tasks.
* Runs the given task, as well as remaining local tasks
*/
final void topLevelExec(ForkJoinTask<?> task, int fifo) {
while (task != null) {
task.doExec();
task = nextLocalTask(fifo);
task = (fifo != 0) ? localPoll() : localPop();
}
}
@ -1578,7 +1564,7 @@ public class ForkJoinPool extends AbstractExecutorService
* Cancels all local tasks. Called only by owner.
*/
final void cancelTasks() {
for (ForkJoinTask<?> t; (t = nextLocalTask(0)) != null; ) {
for (ForkJoinTask<?> t; (t = localPop()) != null; ) {
try {
t.cancel(false);
} catch (Throwable ignore) {
@ -1780,7 +1766,8 @@ public class ForkJoinPool extends AbstractExecutorService
* @param w caller's WorkQueue
*/
final void registerWorker(WorkQueue w) {
if (w != null && (runState & STOP) == 0L) {
if (w != null) {
w.array = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY];
ThreadLocalRandom.localInit();
int seed = w.stackPred = ThreadLocalRandom.getProbe();
int phaseSeq = seed & ~((IDLE << 1) - 1); // initial phase tag
@ -1858,17 +1845,18 @@ public class ForkJoinPool extends AbstractExecutorService
}
if ((tryTerminate(false, false) & STOP) == 0L &&
phase != 0 && w != null && w.source != DROPPED) {
signalWork(); // possibly replace
w.cancelTasks(); // clean queue
signalWork(null, 0); // possibly replace
}
if (ex != null)
ForkJoinTask.rethrow(ex);
}
/**
* Releases an idle worker, or creates one if not enough exist.
* Releases an idle worker, or creates one if not enough exist,
* giving up if array a is nonnull and task at a[k] already taken.
*/
final void signalWork() {
final void signalWork(ForkJoinTask<?>[] a, int k) {
int pc = parallelism;
for (long c = ctl;;) {
WorkQueue[] qs = queues;
@ -1884,13 +1872,15 @@ public class ForkJoinPool extends AbstractExecutorService
if (sp == 0) {
if ((short)(c >>> TC_SHIFT) >= pc)
break;
nc = ((c + TC_UNIT) & TC_MASK);
nc = ((c + TC_UNIT) & TC_MASK) | ac;
}
else if ((v = w) == null)
break;
else
nc = (v.stackPred & LMASK) | (c & TC_MASK);
if (c == (c = compareAndExchangeCtl(c, nc | ac))) {
nc = (v.stackPred & LMASK) | (c & TC_MASK) | ac;
if (a != null && k < a.length && k >= 0 && a[k] == null)
break;
if (c == (c = ctl) && c == (c = compareAndExchangeCtl(c, nc))) {
if (v == null)
createWorker();
else {
@ -1973,178 +1963,196 @@ public class ForkJoinPool extends AbstractExecutorService
* @param w caller's WorkQueue (may be null on failed initialization)
*/
final void runWorker(WorkQueue w) {
if (w != null) {
int phase = w.phase, r = w.stackPred; // seed from registerWorker
int fifo = w.config & FIFO, nsteals = 0, src = -1;
for (;;) {
WorkQueue[] qs;
if (w != null && w.phase != 0) { // else unregistered
WorkQueue[] qs;
int r = w.stackPred; // seed from registerWorker
int fifo = (int)config & FIFO, rescans = 0, inactive = 0, taken = 0, n;
while ((runState & STOP) == 0L && (qs = queues) != null &&
(n = qs.length) > 0) {
int i = r, step = (r >>> 16) | 1;
r ^= r << 13; r ^= r >>> 17; r ^= r << 5; // xorshift
if ((runState & STOP) != 0L || (qs = queues) == null)
break;
int n = qs.length, i = r, step = (r >>> 16) | 1;
boolean rescan = false;
scan: for (int l = n; l > 0; --l, i += step) { // scan queues
int j, cap; WorkQueue q; ForkJoinTask<?>[] a;
if ((q = qs[j = i & (n - 1)]) != null &&
(a = q.array) != null && (cap = a.length) > 0) {
for (int m = cap - 1, pb = -1, b = q.base;;) {
ForkJoinTask<?> t; long k;
scan: for (int j = n; j != 0; --j, i += step) {
WorkQueue q; int qid;
if ((q = qs[qid = i & (n - 1)]) != null) {
ForkJoinTask<?>[] a; int cap; // poll queue
while ((a = q.array) != null && (cap = a.length) > 0) {
int b, nb, nk; long bp; ForkJoinTask<?> t;
t = (ForkJoinTask<?>)U.getReferenceAcquire(
a, k = slotOffset(m & b));
if (b != (b = q.base) || t == null ||
!U.compareAndSetReference(a, k, t, null)) {
if (a[b & m] == null) {
if (rescan) // end of run
break scan;
if (a[(b + 1) & m] == null &&
a[(b + 2) & m] == null) {
break; // probably empty
a, bp = slotOffset((cap - 1) & (b = q.base)));
long np = slotOffset(nk = (nb = b + 1) & (cap - 1));
if (q.base == b) { // else inconsistent
if (t == null) {
if (q.array == a) { // else resized
if (rescans > 0) // ran or stalled
break scan;
if (U.getReference(a, np) == null &&
(rescans >= 0 ||
(U.getReferenceAcquire(a, bp) == null &&
q.top == q.base)))
break;
rescans = 1; // may be stalled
}
if (pb == (pb = b)) { // track progress
rescan = true; // stalled; reorder scan
}
else if (inactive != 0) {
if ((inactive = tryReactivate(w)) != 0) {
rescans = 1; // can't take yet
break scan;
}
}
}
else {
boolean propagate;
int nb = q.base = b + 1, prevSrc = src;
w.nsteals = ++nsteals;
w.source = src = j; // volatile
rescan = true;
int nh = t.noUserHelp();
if (propagate =
(prevSrc != src || nh != 0) && a[nb & m] != null)
signalWork();
w.topLevelExec(t, fifo);
if ((b = q.base) != nb && !propagate)
break scan; // reduce interference
else if (U.compareAndSetReference(a, bp, t, null)) {
q.base = nb;
Object nt = U.getReferenceAcquire(a, np);
w.source = qid;
rescans = 1;
++taken;
if (nt != null && // confirm a[nk]
U.getReferenceAcquire(a, np) == nt)
signalWork(a, nk); // propagate
w.topLevelExec(t, fifo);
}
}
}
}
}
if (!rescan) {
if (((phase = deactivate(w, phase)) & IDLE) != 0)
break;
src = -1; // re-enable propagation
if (rescans >= 0)
--rescans;
else if (inactive == 0) {
if ((inactive = deactivate(w, taken)) != 0)
taken = 0;
}
else if (awaitWork(w) == 0)
inactive = rescans = 0;
else
break;
}
}
}
/**
* Deactivates and if necessary awaits signal or termination.
* Tries to deactivate worker, keeping active on contention
*
* @param w the worker
* @param phase current phase
* @return current phase, with IDLE set if worker should exit
* @param w the work queue
* @param taken number of stolen tasks since last deactivation
* @return nonzero if inactive
*/
private int deactivate(WorkQueue w, int phase) {
if (w == null) // currently impossible
return IDLE;
int p = phase | IDLE, activePhase = phase + (IDLE << 1);
long pc = ctl, qc = (activePhase & LMASK) | ((pc - RC_UNIT) & UMASK);
int sp = w.stackPred = (int)pc; // set ctl stack link
w.phase = p;
if (!compareAndSetCtl(pc, qc)) // try to enqueue
return w.phase = phase; // back out on possible signal
int ac = (short)(qc >>> RC_SHIFT), n; long e; WorkQueue[] qs;
if (((e = runState) & STOP) != 0L ||
((e & SHUTDOWN) != 0L && ac == 0 && quiescent() > 0) ||
(qs = queues) == null || (n = qs.length) <= 0)
return IDLE; // terminating
for (int prechecks = Math.min(ac, 2), // reactivation threshold
k = Math.max(n + (n << 1), SPIN_WAITS << 1);;) {
WorkQueue q; int cap; ForkJoinTask<?>[] a; long c;
if (w.phase == activePhase)
return activePhase;
if (--k < 0)
return awaitWork(w, p); // block, drop, or exit
if ((q = qs[k & (n - 1)]) == null)
Thread.onSpinWait();
else if ((a = q.array) != null && (cap = a.length) > 0 &&
a[q.base & (cap - 1)] != null && --prechecks < 0 &&
(int)(c = ctl) == activePhase &&
compareAndSetCtl(c, (sp & LMASK) | ((c + RC_UNIT) & UMASK)))
return w.phase = activePhase; // reactivate
private int deactivate(WorkQueue w, int taken) {
int inactive = 0, phase;
if (w != null && (inactive = (phase = w.phase) & IDLE) == 0) {
long sp = (phase + (IDLE << 1)) & LMASK, pc, c;
w.phase = phase | IDLE;
w.stackPred = (int)(pc = ctl); // set ctl stack link
if (!compareAndSetCtl( // try to enqueue
pc, c = ((pc - RC_UNIT) & UMASK) | sp))
w.phase = phase; // back out on contention
else {
if (taken != 0) {
w.nsteals += taken;
if ((w.config & CLEAR_TLS) != 0 &&
(Thread.currentThread() instanceof ForkJoinWorkerThread f))
f.resetThreadLocals(); // (instanceof check always true)
}
if (((c & RC_MASK) == 0L && quiescent() > 0) || taken == 0)
inactive = w.phase & IDLE; // check quiescent termination
else { // spin for approx 1 scan cost
int tc = (short)(c >>> TC_SHIFT);
int spins = Math.max((tc << 1) + tc, SPIN_WAITS);
while ((inactive = w.phase & IDLE) != 0 && --spins != 0)
Thread.onSpinWait();
}
}
}
return inactive;
}
/**
* Reactivates worker w if it is currently top of ctl stack
*
* @param w the work queue
* @return 0 if now active
*/
private int tryReactivate(WorkQueue w) {
int inactive = 0;
if (w != null) { // always true; hoist checks
int sp = w.stackPred, phase, activePhase; long c;
if ((inactive = (phase = w.phase) & IDLE) != 0 &&
(int)(c = ctl) == (activePhase = phase + IDLE) &&
compareAndSetCtl(c, (sp & LMASK) | ((c + RC_UNIT) & UMASK))) {
w.phase = activePhase;
inactive = 0;
}
}
return inactive;
}
/**
* Awaits signal or termination.
*
* @param w the work queue
* @param p current phase (known to be idle)
* @return current phase, with IDLE set if worker should exit
* @return 0 if now active
*/
private int awaitWork(WorkQueue w, int p) {
if (w != null) {
ForkJoinWorkerThread t; long deadline;
if ((w.config & CLEAR_TLS) != 0 && (t = w.owner) != null)
t.resetThreadLocals(); // clear before reactivate
if ((ctl & RC_MASK) > 0L)
deadline = 0L;
else if ((deadline =
(((w.source != INVALID_ID) ? keepAlive : TIMEOUT_SLOP)) +
System.currentTimeMillis()) == 0L)
deadline = 1L; // avoid zero
int activePhase = p + IDLE;
if ((p = w.phase) != activePhase && (runState & STOP) == 0L) {
private int awaitWork(WorkQueue w) {
int inactive = 0, phase;
if (w != null) { // always true; hoist checks
long waitTime = (w.source == INVALID_ID) ? 0L : keepAlive;
if ((inactive = (phase = w.phase) & IDLE) != 0) {
LockSupport.setCurrentBlocker(this);
w.parking = 1; // enable unpark
while ((p = w.phase) != activePhase) {
boolean trimmable = false; int trim;
Thread.interrupted(); // clear status
int activePhase = phase + IDLE;
for (long deadline = 0L;;) {
Thread.interrupted(); // clear status
if ((runState & STOP) != 0L)
break;
if (deadline != 0L) {
if ((trim = tryTrim(w, p, deadline)) > 0)
break;
else if (trim < 0)
deadline = 0L;
else
trimmable = true;
boolean trimmable = false; // use timed wait if trimmable
long d = 0L, c;
if (((c = ctl) & RC_MASK) == 0L && (int)c == activePhase) {
long now = System.currentTimeMillis();
if (deadline == 0L)
deadline = waitTime + now;
if (deadline - now <= TIMEOUT_SLOP) {
if (tryTrim(w, c, activePhase))
break;
continue; // lost race to trim
}
d = deadline;
trimmable = true;
}
U.park(trimmable, deadline);
w.parking = 1; // enable unpark and recheck
if ((inactive = w.phase & IDLE) != 0)
U.park(trimmable, d);
w.parking = 0; // close unpark window
if (inactive == 0 || (inactive = w.phase & IDLE) == 0)
break;
}
w.parking = 0;
LockSupport.setCurrentBlocker(null);
}
}
return p;
return inactive;
}
/**
* Tries to remove and deregister worker after timeout, and release
* another to do the same.
* @return > 0: trimmed, < 0 : not trimmable, else 0
* another to do the same unless new tasks are found.
*/
private int tryTrim(WorkQueue w, int phase, long deadline) {
long c, nc; int stat, activePhase, vp, i; WorkQueue[] vs; WorkQueue v;
if ((activePhase = phase + IDLE) != (int)(c = ctl) || w == null)
stat = -1; // no longer ctl top
else if (deadline - System.currentTimeMillis() >= TIMEOUT_SLOP)
stat = 0; // spurious wakeup
else if (!compareAndSetCtl(
c, nc = ((w.stackPred & LMASK) | (RC_MASK & c) |
(TC_MASK & (c - TC_UNIT)))))
stat = -1; // lost race to signaller
else {
stat = 1;
w.source = DROPPED;
w.phase = activePhase;
if ((vp = (int)nc) != 0 && (vs = queues) != null &&
vs.length > (i = vp & SMASK) && (v = vs[i]) != null &&
compareAndSetCtl( // try to wake up next waiter
nc, ((UMASK & (nc + RC_UNIT)) |
(nc & TC_MASK) | (v.stackPred & LMASK)))) {
v.source = INVALID_ID; // enable cascaded timeouts
v.phase = vp;
U.unpark(v.owner);
private boolean tryTrim(WorkQueue w, long c, int activePhase) {
if (w != null) {
int vp, i; WorkQueue[] vs; WorkQueue v;
long nc = ((w.stackPred & LMASK) |
((RC_MASK & c) | (TC_MASK & (c - TC_UNIT))));
if (compareAndSetCtl(c, nc)) {
w.source = DROPPED;
w.phase = activePhase;
if ((vp = (int)nc) != 0 && (vs = queues) != null &&
vs.length > (i = vp & SMASK) && (v = vs[i]) != null &&
compareAndSetCtl( // try to wake up next waiter
nc, ((v.stackPred & LMASK) |
((UMASK & (nc + RC_UNIT)) | (nc & TC_MASK))))) {
v.source = INVALID_ID; // enable cascaded timeouts
v.phase = vp;
U.unpark(v.owner);
}
return true;
}
}
return stat;
return false;
}
/**
@ -2561,52 +2569,35 @@ public class ForkJoinPool extends AbstractExecutorService
/**
* Finds and locks a WorkQueue for an external submitter, or
* throws RejectedExecutionException if shutdown or terminating.
* @param r current ThreadLocalRandom.getProbe() value
* throws RejectedExecutionException if shutdown
* @param rejectOnShutdown true if RejectedExecutionException
* should be thrown when shutdown (else only if terminating)
* should be thrown when shutdown
*/
private WorkQueue submissionQueue(int r, boolean rejectOnShutdown) {
int reuse; // nonzero if prefer create
if ((reuse = r) == 0) {
ThreadLocalRandom.localInit(); // initialize caller's probe
final WorkQueue externalSubmissionQueue(boolean rejectOnShutdown) {
int r;
if ((r = ThreadLocalRandom.getProbe()) == 0) {
ThreadLocalRandom.localInit(); // initialize caller's probe
r = ThreadLocalRandom.getProbe();
}
for (int probes = 0; ; ++probes) {
int n, i, id; WorkQueue[] qs; WorkQueue q;
if ((qs = queues) == null)
break;
if ((n = qs.length) <= 0)
for (;;) {
WorkQueue q; WorkQueue[] qs; int n, id, i;
if ((qs = queues) == null || (n = qs.length) <= 0)
break;
if ((q = qs[i = (id = r & EXTERNAL_ID_MASK) & (n - 1)]) == null) {
WorkQueue w = new WorkQueue(null, id, 0, false);
w.phase = id;
boolean reject = ((lockRunState() & SHUTDOWN) != 0 &&
rejectOnShutdown);
if (!reject && queues == qs && qs[i] == null)
q = qs[i] = w; // else lost race to install
WorkQueue newq = new WorkQueue(null, id, 0, false);
lockRunState();
if (qs[i] == null && queues == qs)
q = qs[i] = newq; // else lost race to install
unlockRunState();
if (q != null)
return q;
if (reject)
}
if (q != null && q.tryLockPhase()) {
if (rejectOnShutdown && (runState & SHUTDOWN) != 0L) {
q.unlockPhase(); // check while q lock held
break;
reuse = 0;
}
if (reuse == 0 || !q.tryLockPhase()) { // move index
if (reuse == 0) {
if (probes >= n >> 1)
reuse = r; // stop prefering free slot
}
else if (q != null)
reuse = 0; // probe on collision
r = ThreadLocalRandom.advanceProbe(r);
}
else if (rejectOnShutdown && (runState & SHUTDOWN) != 0L) {
q.unlockPhase(); // check while q lock held
break;
}
else
return q;
}
r = ThreadLocalRandom.advanceProbe(r); // move
}
throw new RejectedExecutionException();
}
@ -2620,24 +2611,12 @@ public class ForkJoinPool extends AbstractExecutorService
}
else { // find and lock queue
internal = false;
q = submissionQueue(ThreadLocalRandom.getProbe(), true);
q = externalSubmissionQueue(true);
}
q.push(task, signalIfEmpty ? this : null, internal);
return task;
}
/**
* Returns queue for an external submission, bypassing call to
* submissionQueue if already established and unlocked.
*/
final WorkQueue externalSubmissionQueue(boolean rejectOnShutdown) {
WorkQueue[] qs; WorkQueue q; int n;
int r = ThreadLocalRandom.getProbe();
return (((qs = queues) != null && (n = qs.length) > 0 &&
(q = qs[r & EXTERNAL_ID_MASK & (n - 1)]) != null && r != 0 &&
q.tryLockPhase()) ? q : submissionQueue(r, rejectOnShutdown));
}
/**
* Returns queue for an external thread, if one exists that has
* possibly ever submitted to the given pool (nonzero probe), or
@ -3310,11 +3289,14 @@ public class ForkJoinPool extends AbstractExecutorService
* @since 19
*/
public int setParallelism(int size) {
int prevSize;
if (size < 1 || size > MAX_CAP)
throw new IllegalArgumentException();
if ((config & PRESET_SIZE) != 0)
throw new UnsupportedOperationException("Cannot override System property");
return getAndSetParallelism(size);
if ((prevSize = getAndSetParallelism(size)) < size)
signalWork(null, 0); // trigger worker activation
return prevSize;
}
/**

View File

@ -588,13 +588,15 @@ public class LinkedTransferQueue<E> extends AbstractQueue<E>
do {
m = p.item;
q = p.next;
if (p.isData != haveData && haveData != (m != null) &&
p.cmpExItem(m, e) == m) {
Thread w = p.waiter; // matched complementary node
if (p != h && h == cmpExHead(h, (q == null) ? p : q))
h.next = h; // advance head; self-link old
LockSupport.unpark(w);
return m;
if (p.isData != haveData && haveData != (m != null)) {
if (p.cmpExItem(m, e) == m) {
Thread w = p.waiter; // matched complementary node
if (p != h && h == cmpExHead(h, (q == null) ? p : q))
h.next = h; // advance head; self-link old
LockSupport.unpark(w);
return m;
}
continue restart;
} else if (q == null) {
if (ns == 0L) // try to append unless immediate
break restart;

Some files were not shown because too many files have changed in this diff Show More