mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
Merge branch 'openjdk:master' into JDK-8370196
This commit is contained in:
commit
30fa1f0380
@ -50,7 +50,14 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
|
||||
# add -z,relro (mark relocations read only) for all libs
|
||||
# add -z,now ("full relro" - more of the Global Offset Table GOT is marked read only)
|
||||
# add --no-as-needed to disable default --as-needed link flag on some GCC toolchains
|
||||
# add --icf=all (Identical Code Folding — merges identical functions)
|
||||
BASIC_LDFLAGS="-Wl,-z,defs -Wl,-z,relro -Wl,-z,now -Wl,--no-as-needed -Wl,--exclude-libs,ALL"
|
||||
if test "x$LINKER_TYPE" = "xgold"; then
|
||||
if test x$DEBUG_LEVEL = xrelease; then
|
||||
BASIC_LDFLAGS="$BASIC_LDFLAGS -Wl,--icf=all"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Linux : remove unused code+data in link step
|
||||
if test "x$ENABLE_LINKTIME_GC" = xtrue; then
|
||||
if test "x$OPENJDK_TARGET_CPU" = xs390x; then
|
||||
|
||||
@ -516,6 +516,7 @@ AC_DEFUN([TOOLCHAIN_EXTRACT_LD_VERSION],
|
||||
if [ [[ "$LINKER_VERSION_STRING" == *gold* ]] ]; then
|
||||
[ LINKER_VERSION_NUMBER=`$ECHO $LINKER_VERSION_STRING | \
|
||||
$SED -e 's/.* \([0-9][0-9]*\(\.[0-9][0-9]*\)*\).*) .*/\1/'` ]
|
||||
LINKER_TYPE=gold
|
||||
else
|
||||
[ LINKER_VERSION_NUMBER=`$ECHO $LINKER_VERSION_STRING | \
|
||||
$SED -e 's/.* \([0-9][0-9]*\(\.[0-9][0-9]*\)*\).*/\1/'` ]
|
||||
|
||||
@ -170,6 +170,7 @@ ifeq ($(call check-jvm-feature, compiler2), true)
|
||||
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
|
||||
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
|
||||
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_vector.ad \
|
||||
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_atomic.ad \
|
||||
)))
|
||||
endif
|
||||
|
||||
|
||||
@ -237,7 +237,7 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)
|
||||
DISABLED_WARNINGS_gcc_dgif_lib.c := sign-compare, \
|
||||
DISABLED_WARNINGS_gcc_jcmaster.c := implicit-fallthrough, \
|
||||
DISABLED_WARNINGS_gcc_jdphuff.c := shift-negative-value, \
|
||||
DISABLED_WARNINGS_gcc_png.c := maybe-uninitialized unused-function, \
|
||||
DISABLED_WARNINGS_gcc_png.c := maybe-uninitialized, \
|
||||
DISABLED_WARNINGS_gcc_pngerror.c := maybe-uninitialized, \
|
||||
DISABLED_WARNINGS_gcc_splashscreen_gfx_impl.c := implicit-fallthrough \
|
||||
maybe-uninitialized, \
|
||||
@ -248,7 +248,6 @@ ifeq ($(ENABLE_HEADLESS_ONLY), false)
|
||||
DISABLED_WARNINGS_clang := deprecated-non-prototype, \
|
||||
DISABLED_WARNINGS_clang_dgif_lib.c := sign-compare, \
|
||||
DISABLED_WARNINGS_clang_gzwrite.c := format-nonliteral, \
|
||||
DISABLED_WARNINGS_clang_png.c := unused-function, \
|
||||
DISABLED_WARNINGS_clang_splashscreen_impl.c := sign-compare \
|
||||
unused-but-set-variable unused-function, \
|
||||
DISABLED_WARNINGS_clang_splashscreen_png.c := \
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
909
src/hotspot/cpu/aarch64/aarch64_atomic.ad
Normal file
909
src/hotspot/cpu/aarch64/aarch64_atomic.ad
Normal file
@ -0,0 +1,909 @@
|
||||
// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2016, 2021, Red Hat Inc. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License version 2 only, as
|
||||
// published by the Free Software Foundation.
|
||||
//
|
||||
// This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
// version 2 for more details (a copy is included in the LICENSE file that
|
||||
// accompanied this code).
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License version
|
||||
// 2 along with this work; if not, write to the Free Software Foundation,
|
||||
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
//
|
||||
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
// or visit www.oracle.com if you need additional information or have any
|
||||
// questions.
|
||||
//
|
||||
//
|
||||
|
||||
// BEGIN This file is automatically generated. Do not edit --------------
|
||||
|
||||
// Sundry CAS operations. Note that release is always true,
|
||||
// regardless of the memory ordering of the CAS. This is because we
|
||||
// need the volatile case to be sequentially consistent but there is
|
||||
// no trailing StoreLoad barrier emitted by C2. Unfortunately we
|
||||
// can't check the type of memory ordering here, so we always emit a
|
||||
// STLXR.
|
||||
|
||||
// This section is generated from aarch64_atomic_ad.m4
|
||||
|
||||
|
||||
instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchgb $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::byte, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
__ sxtbw($res$$Register, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchgs $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::halfword, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
__ sxthw($res$$Register, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchgw $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchg $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchg $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::byte, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
__ sxtbw($res$$Register, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::halfword, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
__ sxthw($res$$Register, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchg_acq $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
|
||||
match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndSwapB mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgb $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::byte, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndSwapS mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgs $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::halfword, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndSwapI mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgw $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
|
||||
match(Set res (CompareAndSwapL mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (CompareAndSwapB mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::byte, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (CompareAndSwapS mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::halfword, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (CompareAndSwapI mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (CompareAndSwapL mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg_acq $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (CompareAndSwapN mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
|
||||
match(Set res (CompareAndSwapP mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ false, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgb_weak $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::byte, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgs_weak $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::halfword, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgw_weak $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
|
||||
match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg_weak $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgw_weak $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg_weak $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ false, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgb_acq_weak $res = $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::byte, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgs_acq_weak $res = $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::halfword, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgw_acq_weak $res = $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg_acq_weak $res = $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchgw_acq_weak $res = $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::word, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
|
||||
match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg_acq_weak $res = $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::xword, /*acquire*/ true, /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct getAndSetI(indirect mem, iRegI newval, iRegINoSp oldval) %{
|
||||
match(Set oldval (GetAndSetI mem newval));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchgw $oldval, $newval, [$mem]" %}
|
||||
ins_encode %{
|
||||
__ atomic_xchgw($oldval$$Register, $newval$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndSetL(indirect mem, iRegL newval, iRegLNoSp oldval) %{
|
||||
match(Set oldval (GetAndSetL mem newval));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchg $oldval, $newval, [$mem]" %}
|
||||
ins_encode %{
|
||||
__ atomic_xchg($oldval$$Register, $newval$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndSetN(indirect mem, iRegN newval, iRegNNoSp oldval) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set oldval (GetAndSetN mem newval));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchgw $oldval, $newval, [$mem]" %}
|
||||
ins_encode %{
|
||||
__ atomic_xchgw($oldval$$Register, $newval$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndSetP(indirect mem, iRegP newval, iRegPNoSp oldval) %{
|
||||
predicate(n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set oldval (GetAndSetP mem newval));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchg $oldval, $newval, [$mem]" %}
|
||||
ins_encode %{
|
||||
__ atomic_xchg($oldval$$Register, $newval$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndSetIAcq(indirect mem, iRegI newval, iRegINoSp oldval) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set oldval (GetAndSetI mem newval));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchgw_acq $oldval, $newval, [$mem]" %}
|
||||
ins_encode %{
|
||||
__ atomic_xchgalw($oldval$$Register, $newval$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndSetLAcq(indirect mem, iRegL newval, iRegLNoSp oldval) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set oldval (GetAndSetL mem newval));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchg_acq $oldval, $newval, [$mem]" %}
|
||||
ins_encode %{
|
||||
__ atomic_xchgal($oldval$$Register, $newval$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndSetNAcq(indirect mem, iRegN newval, iRegNNoSp oldval) %{
|
||||
predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);
|
||||
match(Set oldval (GetAndSetN mem newval));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchgw_acq $oldval, $newval, [$mem]" %}
|
||||
ins_encode %{
|
||||
__ atomic_xchgalw($oldval$$Register, $newval$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp oldval) %{
|
||||
predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));
|
||||
match(Set oldval (GetAndSetP mem newval));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchg_acq $oldval, $newval, [$mem]" %}
|
||||
ins_encode %{
|
||||
__ atomic_xchgal($oldval$$Register, $newval$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
|
||||
match(Set newval (GetAndAddI mem incr));
|
||||
ins_cost(2*VOLATILE_REF_COST+1);
|
||||
format %{ "get_and_addI $newval, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set newval (GetAndAddI mem incr));
|
||||
ins_cost(VOLATILE_REF_COST+1);
|
||||
format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddINoRes(indirect mem, Universe dummy, iRegIorL2I incr) %{
|
||||
predicate(n->as_LoadStore()->result_not_used());
|
||||
match(Set dummy (GetAndAddI mem incr));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "get_and_addI noreg, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddIAcqNoRes(indirect mem, Universe dummy, iRegIorL2I incr) %{
|
||||
predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
|
||||
match(Set dummy (GetAndAddI mem incr));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
format %{ "get_and_addI_acq noreg, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddIConst(indirect mem, iRegINoSp newval, immIAddSub incr) %{
|
||||
match(Set newval (GetAndAddI mem incr));
|
||||
ins_cost(2*VOLATILE_REF_COST+1);
|
||||
format %{ "get_and_addI $newval, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddIAcqConst(indirect mem, iRegINoSp newval, immIAddSub incr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set newval (GetAndAddI mem incr));
|
||||
ins_cost(VOLATILE_REF_COST+1);
|
||||
format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddINoResConst(indirect mem, Universe dummy, immIAddSub incr) %{
|
||||
predicate(n->as_LoadStore()->result_not_used());
|
||||
match(Set dummy (GetAndAddI mem incr));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "get_and_addI noreg, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddIAcqNoResConst(indirect mem, Universe dummy, immIAddSub incr) %{
|
||||
predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
|
||||
match(Set dummy (GetAndAddI mem incr));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
format %{ "get_and_addI_acq noreg, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddL(indirect mem, iRegLNoSp newval, iRegL incr) %{
|
||||
match(Set newval (GetAndAddL mem incr));
|
||||
ins_cost(2*VOLATILE_REF_COST+1);
|
||||
format %{ "get_and_addL $newval, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set newval (GetAndAddL mem incr));
|
||||
ins_cost(VOLATILE_REF_COST+1);
|
||||
format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddLNoRes(indirect mem, Universe dummy, iRegL incr) %{
|
||||
predicate(n->as_LoadStore()->result_not_used());
|
||||
match(Set dummy (GetAndAddL mem incr));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "get_and_addL noreg, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddLAcqNoRes(indirect mem, Universe dummy, iRegL incr) %{
|
||||
predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
|
||||
match(Set dummy (GetAndAddL mem incr));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
format %{ "get_and_addL_acq noreg, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddLConst(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
|
||||
match(Set newval (GetAndAddL mem incr));
|
||||
ins_cost(2*VOLATILE_REF_COST+1);
|
||||
format %{ "get_and_addL $newval, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddLAcqConst(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
|
||||
predicate(needs_acquiring_load_exclusive(n));
|
||||
match(Set newval (GetAndAddL mem incr));
|
||||
ins_cost(VOLATILE_REF_COST+1);
|
||||
format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddLNoResConst(indirect mem, Universe dummy, immLAddSub incr) %{
|
||||
predicate(n->as_LoadStore()->result_not_used());
|
||||
match(Set dummy (GetAndAddL mem incr));
|
||||
ins_cost(2*VOLATILE_REF_COST);
|
||||
format %{ "get_and_addL noreg, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
|
||||
instruct getAndAddLAcqNoResConst(indirect mem, Universe dummy, immLAddSub incr) %{
|
||||
predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
|
||||
match(Set dummy (GetAndAddL mem incr));
|
||||
ins_cost(VOLATILE_REF_COST);
|
||||
format %{ "get_and_addL_acq noreg, [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}
|
||||
246
src/hotspot/cpu/aarch64/aarch64_atomic_ad.m4
Normal file
246
src/hotspot/cpu/aarch64/aarch64_atomic_ad.m4
Normal file
@ -0,0 +1,246 @@
|
||||
// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2016, 2021, Red Hat Inc. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License version 2 only, as
|
||||
// published by the Free Software Foundation.
|
||||
//
|
||||
// This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
// version 2 for more details (a copy is included in the LICENSE file that
|
||||
// accompanied this code).
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License version
|
||||
// 2 along with this work; if not, write to the Free Software Foundation,
|
||||
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
//
|
||||
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
// or visit www.oracle.com if you need additional information or have any
|
||||
// questions.
|
||||
//
|
||||
//
|
||||
|
||||
// BEGIN This file is automatically generated. Do not edit --------------
|
||||
|
||||
// Sundry CAS operations. Note that release is always true,
|
||||
// regardless of the memory ordering of the CAS. This is because we
|
||||
// need the volatile case to be sequentially consistent but there is
|
||||
// no trailing StoreLoad barrier emitted by C2. Unfortunately we
|
||||
// can't check the type of memory ordering here, so we always emit a
|
||||
// STLXR.
|
||||
|
||||
// This section is generated from aarch64_atomic_ad.m4
|
||||
|
||||
dnl Return Arg1 with two spaces before it. We need this because m4
|
||||
dnl strips leading spaces from macro args.
|
||||
define(`INDENT', ` $1')dnl
|
||||
dnl
|
||||
dnl
|
||||
dnl
|
||||
dnl ====================== CompareAndExchange*
|
||||
dnl
|
||||
define(`CAE_INSN1',
|
||||
`
|
||||
instruct compareAndExchange$1$7(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
|
||||
ifelse($7,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),`dnl')
|
||||
match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
|
||||
ins_cost(`'ifelse($7,Acq,,2*)VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchg$5`'ifelse($7,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::$4, /*acquire*/ ifelse($7,Acq,true,false), /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
__ $6($res$$Register, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
define(`CAE_INSN2',
|
||||
`
|
||||
instruct compareAndExchange$1$6(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
|
||||
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
|
||||
$1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
|
||||
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
|
||||
$1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
|
||||
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
|
||||
`dnl')
|
||||
match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
|
||||
ins_cost(`'ifelse($6,Acq,,2*)VOLATILE_REF_COST);
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchg$5`'ifelse($6,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
CAE_INSN1(B, I, byte, byte, b, sxtbw, )
|
||||
CAE_INSN1(S, I, short, halfword, s, sxthw, )
|
||||
CAE_INSN2(I, I, int, word, w, , )
|
||||
CAE_INSN2(L, L, long, xword, , , )
|
||||
CAE_INSN2(N, N, narrow oop, word, w, , )
|
||||
CAE_INSN2(P, P, ptr, xword, , , )
|
||||
dnl
|
||||
CAE_INSN1(B, I, byte, byte, b, sxtbw, Acq)
|
||||
CAE_INSN1(S, I, short, halfword, s, sxthw, Acq)
|
||||
CAE_INSN2(I, I, int, word, w, Acq)
|
||||
CAE_INSN2(L, L, long, xword, , Acq)
|
||||
CAE_INSN2(N, N, narrow oop, word, w, Acq)
|
||||
CAE_INSN2(P, P, ptr, xword, , Acq)
|
||||
dnl
|
||||
dnl
|
||||
dnl
|
||||
dnl ====================== (Weak)CompareAndSwap*
|
||||
dnl
|
||||
define(`CAS_INSN1',
|
||||
`
|
||||
instruct ifelse($7,Weak,'weakCompare`,'compare`)AndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
|
||||
ifelse($6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),`dnl')
|
||||
match(Set res ($7CompareAndSwap$1 mem (Binary oldval newval)));
|
||||
ins_cost(`'ifelse($6,Acq,,2*)VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg$5`'ifelse($6,Acq,_acq,)`'ifelse($7,Weak,_weak) $res = $mem, $oldval, $newval\t# ($3) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
|
||||
/*weak*/ ifelse($7,Weak,true,false), noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
define(`CAS_INSN2',
|
||||
`
|
||||
instruct ifelse($7,Weak,'weakCompare`,'compare`)AndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
|
||||
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
|
||||
$1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
|
||||
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
|
||||
$1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
|
||||
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
|
||||
`dnl')
|
||||
match(Set res ($7CompareAndSwap$1 mem (Binary oldval newval)));
|
||||
ins_cost(`'ifelse($6,Acq,,2*)VOLATILE_REF_COST);
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg$5`'ifelse($6,Acq,_acq,)`'ifelse($7,Weak,_weak) $res = $mem, $oldval, $newval\t# ($3) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
|
||||
/*weak*/ ifelse($7,Weak,true,false), noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
CAS_INSN1(B, I, byte, byte, b, , )
|
||||
CAS_INSN1(S, I, short, halfword, s, , )
|
||||
CAS_INSN2(I, I, int, word, w, , )
|
||||
CAS_INSN2(L, L, long, xword, , , )
|
||||
CAS_INSN2(N, N, narrow oop, word, w, , )
|
||||
CAS_INSN2(P, P, ptr, xword, , , )
|
||||
dnl
|
||||
CAS_INSN1(B, I, byte, byte, b, Acq, )
|
||||
CAS_INSN1(S, I, short, halfword, s, Acq, )
|
||||
CAS_INSN2(I, I, int, word, w, Acq, )
|
||||
CAS_INSN2(L, L, long, xword, , Acq, )
|
||||
CAS_INSN2(N, N, narrow oop, word, w, Acq, )
|
||||
CAS_INSN2(P, P, ptr, xword, , Acq, )
|
||||
dnl
|
||||
CAS_INSN1(B, I, byte, byte, b, , Weak)
|
||||
CAS_INSN1(S, I, short, halfword, s, , Weak)
|
||||
CAS_INSN2(I, I, int, word, w, , Weak)
|
||||
CAS_INSN2(L, L, long, xword, , , Weak)
|
||||
CAS_INSN2(N, N, narrow oop, word, w, , Weak)
|
||||
CAS_INSN2(P, P, ptr, xword, , , Weak)
|
||||
dnl
|
||||
CAS_INSN1(B, I, byte, byte, b, Acq, Weak)
|
||||
CAS_INSN1(S, I, short, halfword, s, Acq, Weak)
|
||||
CAS_INSN2(I, I, int, word, w, Acq, Weak)
|
||||
CAS_INSN2(L, L, long, xword, , Acq, Weak)
|
||||
CAS_INSN2(N, N, narrow oop, word, w, Acq, Weak)
|
||||
CAS_INSN2(P, P, ptr, xword, , Acq, Weak)
|
||||
dnl
|
||||
dnl
|
||||
dnl
|
||||
dnl ====================== GetAndSet*
|
||||
dnl
|
||||
define(`GAS_INSN1',
|
||||
`
|
||||
instruct getAndSet$1$3(indirect mem, iReg$1 newval, iReg$1NoSp oldval) %{
|
||||
ifelse($1$3,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
|
||||
$1$3,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
|
||||
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
|
||||
$1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
|
||||
$3,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
|
||||
`dnl')
|
||||
match(Set oldval (GetAndSet$1 mem newval));
|
||||
ins_cost(`'ifelse($4,Acq,,2*)VOLATILE_REF_COST);
|
||||
format %{ "atomic_xchg$2`'ifelse($3,Acq,_acq) $oldval, $newval, [$mem]" %}
|
||||
ins_encode %{
|
||||
__ atomic_xchg`'ifelse($3,Acq,al)$2($oldval$$Register, $newval$$Register, as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}')dnl
|
||||
dnl
|
||||
GAS_INSN1(I, w, )
|
||||
GAS_INSN1(L, , )
|
||||
GAS_INSN1(N, w, )
|
||||
GAS_INSN1(P, , )
|
||||
dnl
|
||||
GAS_INSN1(I, w, Acq)
|
||||
GAS_INSN1(L, , Acq)
|
||||
GAS_INSN1(N, w, Acq)
|
||||
GAS_INSN1(P, , Acq)
|
||||
dnl
|
||||
dnl
|
||||
dnl
|
||||
dnl ====================== GetAndAdd*
|
||||
dnl
|
||||
define(`GAA_INSN1',
|
||||
`
|
||||
instruct getAndAdd$1$4$5$6(indirect mem, `'ifelse($5,NoRes,Universe dummy,iReg$1NoSp newval), `'ifelse($6,Const,imm$1AddSub incr,iReg$2 incr)) %{
|
||||
ifelse($4$5,AcqNoRes,INDENT(predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));),
|
||||
$5,NoRes,INDENT(predicate(n->as_LoadStore()->result_not_used());),
|
||||
$4,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
|
||||
`dnl')
|
||||
match(Set ifelse($5,NoRes,dummy,newval) (GetAndAdd$1 mem incr));
|
||||
ins_cost(`'ifelse($4,Acq,,2*)VOLATILE_REF_COST`'ifelse($5,NoRes,,+1));
|
||||
format %{ "get_and_add$1`'ifelse($4,Acq,_acq) `'ifelse($5,NoRes,noreg,$newval), [$mem], $incr" %}
|
||||
ins_encode %{
|
||||
__ atomic_add`'ifelse($4,Acq,al)$3(`'ifelse($5,NoRes,noreg,$newval$$Register), `'ifelse($6,Const,$incr$$constant,$incr$$Register), as_Register($mem$$base));
|
||||
%}
|
||||
ins_pipe(pipe_serial);
|
||||
%}')dnl
|
||||
dnl
|
||||
dnl
|
||||
GAA_INSN1(I, IorL2I, w, , , )
|
||||
GAA_INSN1(I, IorL2I, w, Acq, , )
|
||||
GAA_INSN1(I, IorL2I, w, , NoRes, )
|
||||
GAA_INSN1(I, IorL2I, w, Acq, NoRes, )
|
||||
GAA_INSN1(I, I, w, , , Const)
|
||||
GAA_INSN1(I, I, w, Acq, , Const)
|
||||
GAA_INSN1(I, I, w, , NoRes, Const)
|
||||
GAA_INSN1(I, I, w, Acq, NoRes, Const)
|
||||
dnl
|
||||
GAA_INSN1(L, L, , , , )
|
||||
GAA_INSN1(L, L, , Acq, , )
|
||||
GAA_INSN1(L, L, , , NoRes, )
|
||||
GAA_INSN1(L, L, , Acq, NoRes, )
|
||||
GAA_INSN1(L, L, , , , Const)
|
||||
GAA_INSN1(L, L, , Acq, , Const)
|
||||
GAA_INSN1(L, L, , , NoRes, Const)
|
||||
GAA_INSN1(L, L, , Acq, NoRes, Const)
|
||||
dnl
|
||||
@ -1,161 +0,0 @@
|
||||
dnl Copyright (c) 2016, 2021, Red Hat Inc. All rights reserved.
|
||||
dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
dnl
|
||||
dnl This code is free software; you can redistribute it and/or modify it
|
||||
dnl under the terms of the GNU General Public License version 2 only, as
|
||||
dnl published by the Free Software Foundation.
|
||||
dnl
|
||||
dnl This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
dnl FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
dnl version 2 for more details (a copy is included in the LICENSE file that
|
||||
dnl accompanied this code).
|
||||
dnl
|
||||
dnl You should have received a copy of the GNU General Public License version
|
||||
dnl 2 along with this work; if not, write to the Free Software Foundation,
|
||||
dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
dnl
|
||||
dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
dnl or visit www.oracle.com if you need additional information or have any
|
||||
dnl questions.
|
||||
dnl
|
||||
dnl
|
||||
dnl Process this file with m4 cas.m4 to generate the CAE and wCAS
|
||||
dnl instructions used in aarch64.ad.
|
||||
dnl
|
||||
|
||||
// BEGIN This section of the file is automatically generated. Do not edit --------------
|
||||
|
||||
// Sundry CAS operations. Note that release is always true,
|
||||
// regardless of the memory ordering of the CAS. This is because we
|
||||
// need the volatile case to be sequentially consistent but there is
|
||||
// no trailing StoreLoad barrier emitted by C2. Unfortunately we
|
||||
// can't check the type of memory ordering here, so we always emit a
|
||||
// STLXR.
|
||||
|
||||
// This section is generated from cas.m4
|
||||
|
||||
dnl Return Arg1 with two spaces before it. We need this because m4
|
||||
dnl strips leading spaces from macro args.
|
||||
define(`INDENT', ` $1')dnl
|
||||
dnl
|
||||
define(`CAS_INSN',
|
||||
`
|
||||
// This pattern is generated automatically from cas.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
instruct compareAndExchange$1$6(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
|
||||
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
|
||||
$1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
|
||||
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
|
||||
$1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
|
||||
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
|
||||
`dnl')
|
||||
match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
|
||||
ifelse($6,Acq,'ins_cost(VOLATILE_REF_COST);`,'ins_cost(2 * VOLATILE_REF_COST);`)
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchg$5`'ifelse($6,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
define(`CAS_INSN4',
|
||||
`
|
||||
// This pattern is generated automatically from cas.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
instruct compareAndExchange$1$7(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
|
||||
ifelse($7,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),`dnl')
|
||||
match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
|
||||
ifelse($7,Acq,'ins_cost(VOLATILE_REF_COST);`,'ins_cost(2 * VOLATILE_REF_COST);`)
|
||||
effect(TEMP_DEF res, KILL cr);
|
||||
format %{
|
||||
"cmpxchg$5`'ifelse($7,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::$4, /*acquire*/ ifelse($7,Acq,true,false), /*release*/ true,
|
||||
/*weak*/ false, $res$$Register);
|
||||
__ $6($res$$Register, $res$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
CAS_INSN4(B,I,byte,byte,b,sxtbw)
|
||||
CAS_INSN4(S,I,short,halfword,s,sxthw)
|
||||
CAS_INSN(I,I,int,word,w)
|
||||
CAS_INSN(L,L,long,xword)
|
||||
CAS_INSN(N,N,narrow oop,word,w)
|
||||
CAS_INSN(P,P,ptr,xword)
|
||||
dnl
|
||||
CAS_INSN4(B,I,byte,byte,b,sxtbw,Acq)
|
||||
CAS_INSN4(S,I,short,halfword,s,sxthw,Acq)
|
||||
CAS_INSN(I,I,int,word,w,Acq)
|
||||
CAS_INSN(L,L,long,xword,,Acq)
|
||||
CAS_INSN(N,N,narrow oop,word,w,Acq)
|
||||
CAS_INSN(P,P,ptr,xword,,Acq)
|
||||
dnl
|
||||
define(`CAS_INSN2',
|
||||
`
|
||||
// This pattern is generated automatically from cas.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
|
||||
ifelse($6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),`dnl')
|
||||
match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval)));
|
||||
ifelse($6,Acq,'ins_cost(VOLATILE_REF_COST);`,'ins_cost(2 * VOLATILE_REF_COST);`)
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg$5`'ifelse($6,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
define(`CAS_INSN3',
|
||||
`
|
||||
// This pattern is generated automatically from cas.m4.
|
||||
// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
|
||||
instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
|
||||
ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));),
|
||||
$1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);),
|
||||
$1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
|
||||
$1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);),
|
||||
$6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));),
|
||||
`dnl')
|
||||
match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval)));
|
||||
ifelse($6,Acq,'ins_cost(VOLATILE_REF_COST);`,'ins_cost(2 * VOLATILE_REF_COST);`)
|
||||
effect(KILL cr);
|
||||
format %{
|
||||
"cmpxchg$5`'ifelse($6,Acq,_acq,) $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
|
||||
"csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
|
||||
Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
|
||||
/*weak*/ true, noreg);
|
||||
__ csetw($res$$Register, Assembler::EQ);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
CAS_INSN2(B,I,byte,byte,b)
|
||||
CAS_INSN2(S,I,short,halfword,s)
|
||||
CAS_INSN3(I,I,int,word,w)
|
||||
CAS_INSN3(L,L,long,xword)
|
||||
CAS_INSN3(N,N,narrow oop,word,w)
|
||||
CAS_INSN3(P,P,ptr,xword)
|
||||
CAS_INSN2(B,I,byte,byte,b,Acq)
|
||||
CAS_INSN2(S,I,short,halfword,s,Acq)
|
||||
CAS_INSN3(I,I,int,word,w,Acq)
|
||||
CAS_INSN3(L,L,long,xword,,Acq)
|
||||
CAS_INSN3(N,N,narrow oop,word,w,Acq)
|
||||
CAS_INSN3(P,P,ptr,xword,,Acq)
|
||||
dnl
|
||||
|
||||
// END This section of the file is automatically generated. Do not edit --------------
|
||||
@ -1063,6 +1063,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_generic_vector(MachOper* opnd) {
|
||||
ShouldNotReachHere(); // generic vector operands not supported
|
||||
return false;
|
||||
|
||||
@ -157,6 +157,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I add_va
|
||||
return result;
|
||||
}
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
|
||||
|
||||
@ -2383,6 +2383,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_generic_vector(MachOper* opnd) {
|
||||
ShouldNotReachHere(); // generic vector operands not supported
|
||||
return false;
|
||||
|
||||
@ -2053,6 +2053,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_generic_vector(MachOper* opnd) {
|
||||
ShouldNotReachHere(); // generic vector operands not supported
|
||||
return false;
|
||||
|
||||
@ -1865,6 +1865,10 @@ bool Matcher::is_reg2reg_move(MachNode* m) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_register_biasing_candidate(const MachNode* mdef, int oper_index) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::is_generic_vector(MachOper* opnd) {
|
||||
ShouldNotReachHere(); // generic vector operands not supported
|
||||
return false;
|
||||
|
||||
@ -89,10 +89,10 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
|
||||
|
||||
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
||||
Register addr, Register count, Register tmp) {
|
||||
Label done;
|
||||
Label L_done;
|
||||
|
||||
__ testptr(count, count);
|
||||
__ jcc(Assembler::zero, done);
|
||||
__ jccb(Assembler::zero, L_done);
|
||||
|
||||
// Calculate end address in "count".
|
||||
Address::ScaleFactor scale = UseCompressedOops ? Address::times_4 : Address::times_8;
|
||||
@ -111,31 +111,31 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas
|
||||
__ shrptr(count, CardTable::card_shift());
|
||||
__ addptr(count, tmp);
|
||||
|
||||
Label loop;
|
||||
Label L_loop;
|
||||
// Iterate from start card to end card (inclusive).
|
||||
__ bind(loop);
|
||||
__ bind(L_loop);
|
||||
|
||||
Label is_clean_card;
|
||||
Label L_is_clean_card;
|
||||
if (UseCondCardMark) {
|
||||
__ cmpb(Address(addr, 0), G1CardTable::clean_card_val());
|
||||
__ jcc(Assembler::equal, is_clean_card);
|
||||
__ jccb(Assembler::equal, L_is_clean_card);
|
||||
} else {
|
||||
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());
|
||||
}
|
||||
|
||||
Label next_card;
|
||||
__ bind(next_card);
|
||||
Label L_next_card;
|
||||
__ bind(L_next_card);
|
||||
__ addptr(addr, sizeof(CardTable::CardValue));
|
||||
__ cmpptr(addr, count);
|
||||
__ jcc(Assembler::belowEqual, loop);
|
||||
__ jmp(done);
|
||||
__ jccb(Assembler::belowEqual, L_loop);
|
||||
__ jmpb(L_done);
|
||||
|
||||
__ bind(is_clean_card);
|
||||
// Card was clean. Dirty card and go to next..
|
||||
__ bind(L_is_clean_card);
|
||||
// Card was clean. Dirty card and go to next.
|
||||
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());
|
||||
__ jmp(next_card);
|
||||
__ jmpb(L_next_card);
|
||||
|
||||
__ bind(done);
|
||||
__ bind(L_done);
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
@ -157,22 +157,6 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator
|
||||
}
|
||||
}
|
||||
|
||||
static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
|
||||
const Register thread, const Register value, const Register temp) {
|
||||
// This code assumes that buffer index is pointer sized.
|
||||
STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t));
|
||||
// Can we store a value in the given thread's buffer?
|
||||
// (The index field is typed as size_t.)
|
||||
__ movptr(temp, Address(thread, in_bytes(index_offset))); // temp := *(index address)
|
||||
__ testptr(temp, temp); // index == 0?
|
||||
__ jcc(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer)
|
||||
// The buffer is not full, store value into it.
|
||||
__ subptr(temp, wordSize); // temp := next index
|
||||
__ movptr(Address(thread, in_bytes(index_offset)), temp); // *(index address) := next index
|
||||
__ addptr(temp, Address(thread, in_bytes(buffer_offset))); // temp := buffer address + next index
|
||||
__ movptr(Address(temp, 0), value); // *(buffer address + next index) := value
|
||||
}
|
||||
|
||||
static void generate_pre_barrier_fast_path(MacroAssembler* masm,
|
||||
const Register thread) {
|
||||
Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
|
||||
@ -190,21 +174,40 @@ static void generate_pre_barrier_slow_path(MacroAssembler* masm,
|
||||
const Register pre_val,
|
||||
const Register thread,
|
||||
const Register tmp,
|
||||
Label& done,
|
||||
Label& runtime) {
|
||||
Label& L_done) {
|
||||
Address index_addr(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
|
||||
Address buffer_addr(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
|
||||
|
||||
// This code assumes that buffer index is pointer sized.
|
||||
STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t));
|
||||
|
||||
Label L_runtime;
|
||||
|
||||
// Do we need to load the previous value?
|
||||
if (obj != noreg) {
|
||||
__ load_heap_oop(pre_val, Address(obj, 0), noreg, AS_RAW);
|
||||
}
|
||||
|
||||
// Is the previous value null?
|
||||
__ cmpptr(pre_val, NULL_WORD);
|
||||
__ jcc(Assembler::equal, done);
|
||||
generate_queue_insertion(masm,
|
||||
G1ThreadLocalData::satb_mark_queue_index_offset(),
|
||||
G1ThreadLocalData::satb_mark_queue_buffer_offset(),
|
||||
runtime,
|
||||
thread, pre_val, tmp);
|
||||
__ jmp(done);
|
||||
__ testptr(pre_val, pre_val);
|
||||
__ jcc(Assembler::equal, L_done);
|
||||
|
||||
// Can we store a value in the given thread's buffer?
|
||||
// (The index field is typed as size_t.)
|
||||
__ movptr(tmp, index_addr); // temp := *(index address)
|
||||
__ testptr(tmp, tmp); // index == 0?
|
||||
__ jccb(Assembler::zero, L_runtime); // jump to runtime if index == 0 (full buffer)
|
||||
|
||||
// The buffer is not full, store value into it.
|
||||
__ subptr(tmp, wordSize); // temp := next index
|
||||
__ movptr(index_addr, tmp); // *(index address) := next index
|
||||
__ addptr(tmp, buffer_addr); // temp := buffer address + next index
|
||||
__ movptr(Address(tmp, 0), pre_val); // *(buffer address + next index) := value
|
||||
|
||||
// Jump out if done, or fall-through to runtime.
|
||||
// "L_done" is far away, so jump cannot be short.
|
||||
__ jmp(L_done);
|
||||
__ bind(L_runtime);
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
|
||||
@ -219,7 +222,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
|
||||
const Register thread = r15_thread;
|
||||
|
||||
Label done;
|
||||
Label runtime;
|
||||
|
||||
assert(pre_val != noreg, "check this code");
|
||||
|
||||
@ -231,9 +233,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
|
||||
generate_pre_barrier_fast_path(masm, thread);
|
||||
// If marking is not active (*(mark queue active address) == 0), jump to done
|
||||
__ jcc(Assembler::equal, done);
|
||||
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done, runtime);
|
||||
|
||||
__ bind(runtime);
|
||||
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done);
|
||||
|
||||
// Determine and save the live input values
|
||||
__ push_call_clobbered_registers();
|
||||
@ -272,23 +272,23 @@ static void generate_post_barrier(MacroAssembler* masm,
|
||||
const Register store_addr,
|
||||
const Register new_val,
|
||||
const Register tmp1,
|
||||
Label& done,
|
||||
bool new_val_may_be_null) {
|
||||
|
||||
assert_different_registers(store_addr, new_val, tmp1, noreg);
|
||||
|
||||
Register thread = r15_thread;
|
||||
|
||||
Label L_done;
|
||||
// Does store cross heap regions?
|
||||
__ movptr(tmp1, store_addr); // tmp1 := store address
|
||||
__ xorptr(tmp1, new_val); // tmp1 := store address ^ new value
|
||||
__ shrptr(tmp1, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
|
||||
__ jcc(Assembler::equal, done);
|
||||
__ jccb(Assembler::equal, L_done);
|
||||
|
||||
// Crosses regions, storing null?
|
||||
if (new_val_may_be_null) {
|
||||
__ cmpptr(new_val, NULL_WORD); // new value == null?
|
||||
__ jcc(Assembler::equal, done);
|
||||
__ testptr(new_val, new_val); // new value == null?
|
||||
__ jccb(Assembler::equal, L_done);
|
||||
}
|
||||
|
||||
__ movptr(tmp1, store_addr); // tmp1 := store address
|
||||
@ -298,20 +298,19 @@ static void generate_post_barrier(MacroAssembler* masm,
|
||||
__ addptr(tmp1, card_table_addr); // tmp1 := card address
|
||||
if (UseCondCardMark) {
|
||||
__ cmpb(Address(tmp1, 0), G1CardTable::clean_card_val()); // *(card address) == clean_card_val?
|
||||
__ jcc(Assembler::notEqual, done);
|
||||
__ jccb(Assembler::notEqual, L_done);
|
||||
}
|
||||
// Storing a region crossing, non-null oop, card is clean.
|
||||
// Dirty card.
|
||||
__ movb(Address(tmp1, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
|
||||
__ bind(L_done);
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register tmp) {
|
||||
Label done;
|
||||
generate_post_barrier(masm, store_addr, new_val, tmp, done, true /* new_val_may_be_null */);
|
||||
__ bind(done);
|
||||
generate_post_barrier(masm, store_addr, new_val, tmp, true /* new_val_may_be_null */);
|
||||
}
|
||||
|
||||
#if defined(COMPILER2)
|
||||
@ -354,7 +353,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,
|
||||
void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
|
||||
G1PreBarrierStubC2* stub) const {
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
Label runtime;
|
||||
Register obj = stub->obj();
|
||||
Register pre_val = stub->pre_val();
|
||||
Register thread = stub->thread();
|
||||
@ -362,9 +360,8 @@ void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
|
||||
assert(stub->tmp2() == noreg, "not needed in this platform");
|
||||
|
||||
__ bind(*stub->entry());
|
||||
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation(), runtime);
|
||||
generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation());
|
||||
|
||||
__ bind(runtime);
|
||||
generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry));
|
||||
__ jmp(*stub->continuation());
|
||||
}
|
||||
@ -374,9 +371,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
|
||||
Register new_val,
|
||||
Register tmp,
|
||||
bool new_val_may_be_null) {
|
||||
Label done;
|
||||
generate_post_barrier(masm, store_addr, new_val, tmp, done, new_val_may_be_null);
|
||||
__ bind(done);
|
||||
generate_post_barrier(masm, store_addr, new_val, tmp, new_val_may_be_null);
|
||||
}
|
||||
|
||||
#endif // COMPILER2
|
||||
@ -449,7 +444,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
|
||||
ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/);
|
||||
}
|
||||
|
||||
__ cmpptr(pre_val_reg, NULL_WORD);
|
||||
__ testptr(pre_val_reg, pre_val_reg);
|
||||
__ jcc(Assembler::equal, *stub->continuation());
|
||||
ce->store_parameter(stub->pre_val()->as_register(), 0);
|
||||
__ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
|
||||
@ -465,9 +460,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2 /* unused on x86 */) {
|
||||
Label done;
|
||||
generate_post_barrier(masm, store_addr, new_val, tmp1, done, true /* new_val_may_be_null */);
|
||||
masm->bind(done);
|
||||
generate_post_barrier(masm, store_addr, new_val, tmp1, true /* new_val_may_be_null */);
|
||||
}
|
||||
|
||||
#define __ sasm->
|
||||
@ -490,8 +483,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
|
||||
Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
|
||||
Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
|
||||
|
||||
Label done;
|
||||
Label runtime;
|
||||
Label L_done, L_runtime;
|
||||
|
||||
// Is marking still active?
|
||||
if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
|
||||
@ -500,13 +492,13 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
|
||||
assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
|
||||
__ cmpb(queue_active, 0);
|
||||
}
|
||||
__ jcc(Assembler::equal, done);
|
||||
__ jcc(Assembler::equal, L_done);
|
||||
|
||||
// Can we store original value in the thread's buffer?
|
||||
|
||||
__ movptr(tmp, queue_index);
|
||||
__ testptr(tmp, tmp);
|
||||
__ jcc(Assembler::zero, runtime);
|
||||
__ jccb(Assembler::zero, L_runtime);
|
||||
__ subptr(tmp, wordSize);
|
||||
__ movptr(queue_index, tmp);
|
||||
__ addptr(tmp, buffer);
|
||||
@ -514,9 +506,9 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
|
||||
// prev_val (rax)
|
||||
__ load_parameter(0, pre_val);
|
||||
__ movptr(Address(tmp, 0), pre_val);
|
||||
__ jmp(done);
|
||||
__ jmp(L_done);
|
||||
|
||||
__ bind(runtime);
|
||||
__ bind(L_runtime);
|
||||
|
||||
__ push_call_clobbered_registers();
|
||||
|
||||
@ -526,7 +518,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
|
||||
|
||||
__ pop_call_clobbered_registers();
|
||||
|
||||
__ bind(done);
|
||||
__ bind(L_done);
|
||||
|
||||
__ pop_ppx(rdx);
|
||||
__ pop_ppx(rax);
|
||||
|
||||
@ -3524,10 +3524,10 @@ void StubGenerator::aesgcm_avx512(Register in, Register len, Register ct, Regist
|
||||
false, true, false, false, false, ghashin_offset, aesout_offset, HashKey_32);
|
||||
|
||||
ghash16_avx512(false, true, false, false, true, in, pos, avx512_subkeyHtbl, AAD_HASHx, SHUF_MASK, stack_offset, 16 * 16, 0, HashKey_16);
|
||||
__ addl(pos, 16 * 16);
|
||||
|
||||
__ bind(MESG_BELOW_32_BLKS);
|
||||
__ subl(len, 16 * 16);
|
||||
__ addl(pos, 16 * 16);
|
||||
gcm_enc_dec_last_avx512(len, in, pos, AAD_HASHx, SHUF_MASK, avx512_subkeyHtbl, ghashin_offset, HashKey_16, true, true);
|
||||
|
||||
__ bind(GHASH_DONE);
|
||||
@ -4016,13 +4016,15 @@ void StubGenerator::aesgcm_avx2(Register in, Register len, Register ct, Register
|
||||
const Register rounds = r10;
|
||||
const XMMRegister ctr_blockx = xmm9;
|
||||
const XMMRegister aad_hashx = xmm8;
|
||||
Label encrypt_done, encrypt_by_8_new, encrypt_by_8;
|
||||
Label encrypt_done, encrypt_by_8_new, encrypt_by_8, exit;
|
||||
|
||||
//This routine should be called only for message sizes of 128 bytes or more.
|
||||
//Macro flow:
|
||||
//process 8 16 byte blocks in initial_num_blocks.
|
||||
//process 8 16 byte blocks at a time until all are done 'encrypt_by_8_new followed by ghash_last_8'
|
||||
__ xorl(pos, pos);
|
||||
__ cmpl(len, 128);
|
||||
__ jcc(Assembler::less, exit);
|
||||
|
||||
//Generate 8 constants for htbl
|
||||
generateHtbl_8_block_avx2(subkeyHtbl);
|
||||
@ -4090,6 +4092,7 @@ void StubGenerator::aesgcm_avx2(Register in, Register len, Register ct, Register
|
||||
__ vpxor(xmm0, xmm0, xmm0, Assembler::AVX_128bit);
|
||||
__ vpxor(xmm13, xmm13, xmm13, Assembler::AVX_128bit);
|
||||
|
||||
__ bind(exit);
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
@ -2633,6 +2633,70 @@ bool Matcher::supports_vector_calling_convention(void) {
|
||||
return EnableVectorSupport;
|
||||
}
|
||||
|
||||
static bool is_ndd_demotable(const MachNode* mdef) {
|
||||
return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
|
||||
}
|
||||
|
||||
static bool is_ndd_demotable_commutative(const MachNode* mdef) {
|
||||
return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
|
||||
}
|
||||
|
||||
static bool is_demotion_candidate(const MachNode* mdef) {
|
||||
return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
|
||||
}
|
||||
|
||||
bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
|
||||
int oper_index) {
|
||||
if (mdef == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
|
||||
mdef->in(mdef->operand_index(oper_index)) == nullptr) {
|
||||
assert(oper_index != 1 || !is_demotion_candidate(mdef), "%s", mdef->Name());
|
||||
assert(oper_index != 2 || !is_ndd_demotable_commutative(mdef), "%s", mdef->Name());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Complex memory operand covers multiple incoming edges needed for
|
||||
// address computation. Biasing def towards any address component will not
|
||||
// result in NDD demotion by assembler.
|
||||
if (mdef->operand_num_edges(oper_index) != 1) {
|
||||
assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Demotion candidate must be register mask compatible with definition.
|
||||
const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
|
||||
if (!oper_mask.overlap(mdef->out_RegMask())) {
|
||||
assert(!is_demotion_candidate(mdef), "%s", mdef->Name());
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (oper_index) {
|
||||
// First operand of MachNode corresponding to Intel APX NDD selection
|
||||
// pattern can share its assigned register with definition operand if
|
||||
// their live ranges do not overlap. In such a scenario we can demote
|
||||
// it to legacy map0/map1 instruction by replacing its 4-byte extended
|
||||
// EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
|
||||
// are decorated with a special flag by instruction selector.
|
||||
case 1:
|
||||
return is_demotion_candidate(mdef);
|
||||
|
||||
// Definition operand of commutative operation can be biased towards second
|
||||
// operand.
|
||||
case 2:
|
||||
return is_ndd_demotable_commutative(mdef);
|
||||
|
||||
// Current scheme only selects up to two biasing candidates
|
||||
default:
|
||||
assert(false, "unhandled operand index: %s", mdef->Name());
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
|
||||
assert(EnableVectorSupport, "sanity");
|
||||
int lo = XMM0_num;
|
||||
@ -2812,7 +2876,7 @@ static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_byte
|
||||
|
||||
class Node::PD {
|
||||
public:
|
||||
enum NodeFlags {
|
||||
enum NodeFlags : uint64_t {
|
||||
Flag_intel_jcc_erratum = Node::_last_flag << 1,
|
||||
Flag_sets_carry_flag = Node::_last_flag << 2,
|
||||
Flag_sets_parity_flag = Node::_last_flag << 3,
|
||||
@ -2824,7 +2888,9 @@ public:
|
||||
Flag_clears_zero_flag = Node::_last_flag << 9,
|
||||
Flag_clears_overflow_flag = Node::_last_flag << 10,
|
||||
Flag_clears_sign_flag = Node::_last_flag << 11,
|
||||
_last_flag = Flag_clears_sign_flag
|
||||
Flag_ndd_demotable = Node::_last_flag << 12,
|
||||
Flag_ndd_demotable_commutative = Node::_last_flag << 13,
|
||||
_last_flag = Flag_ndd_demotable_commutative
|
||||
};
|
||||
};
|
||||
|
||||
@ -9801,7 +9867,7 @@ instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AddI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -9829,7 +9895,7 @@ instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AddI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -9872,7 +9938,7 @@ instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AddI src1 (LoadI src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
ins_cost(150);
|
||||
format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
|
||||
@ -9929,6 +9995,7 @@ instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
|
||||
predicate(UseAPX && UseIncDec);
|
||||
match(Set dst (AddI src val));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eincl $dst, $src\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -9983,6 +10050,7 @@ instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
|
||||
predicate(UseAPX && UseIncDec);
|
||||
match(Set dst (AddI src val));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "edecl $dst, $src\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -10089,7 +10157,7 @@ instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AddL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -10117,7 +10185,7 @@ instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AddL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -10160,7 +10228,7 @@ instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AddL src1 (LoadL src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
ins_cost(150);
|
||||
format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
|
||||
@ -10216,6 +10284,7 @@ instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
|
||||
predicate(UseAPX && UseIncDec);
|
||||
match(Set dst (AddL src val));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eincq $dst, $src\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -10270,6 +10339,7 @@ instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
|
||||
predicate(UseAPX && UseIncDec);
|
||||
match(Set dst (AddL src val));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "edecq $dst, $src\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -10984,7 +11054,7 @@ instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (SubI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -10998,7 +11068,7 @@ instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (SubI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -11041,7 +11111,7 @@ instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (SubI src1 (LoadI src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
ins_cost(150);
|
||||
format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
|
||||
@ -11099,7 +11169,7 @@ instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (SubL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -11113,7 +11183,7 @@ instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (SubL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -11156,7 +11226,7 @@ instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (SubL src1 (LoadL src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
ins_cost(150);
|
||||
format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
|
||||
@ -11228,7 +11298,7 @@ instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (SubI zero src));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "enegl $dst, $src\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -11256,7 +11326,7 @@ instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (NegI src));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "enegl $dst, $src\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -11297,7 +11367,7 @@ instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (SubL zero src));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "enegq $dst, $src\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -11325,7 +11395,7 @@ instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (NegL src));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
|
||||
flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "enegq $dst, $src\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -11370,6 +11440,7 @@ instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (MulI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
ins_cost(300);
|
||||
format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
|
||||
@ -11411,6 +11482,7 @@ instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (MulI src1 (LoadI src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
ins_cost(350);
|
||||
format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
|
||||
@ -11462,6 +11534,7 @@ instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (MulL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
ins_cost(300);
|
||||
format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
|
||||
@ -11503,6 +11576,7 @@ instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (MulL src1 (LoadL src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
ins_cost(350);
|
||||
format %{ "eimulq $dst, $src1, $src2 \t# long" %}
|
||||
@ -11777,6 +11851,7 @@ instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (LShiftI src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
|
||||
ins_encode %{
|
||||
@ -11805,6 +11880,7 @@ instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (LShiftI src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
|
||||
ins_encode %{
|
||||
@ -11911,6 +11987,7 @@ instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (RShiftI src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
|
||||
ins_encode %{
|
||||
@ -12017,6 +12094,7 @@ instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (URShiftI src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
|
||||
ins_encode %{
|
||||
@ -12124,6 +12202,7 @@ instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (LShiftL src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
|
||||
ins_encode %{
|
||||
@ -12152,6 +12231,7 @@ instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (LShiftL src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
|
||||
ins_encode %{
|
||||
@ -12258,6 +12338,7 @@ instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (RShiftL src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
|
||||
ins_encode %{
|
||||
@ -12364,6 +12445,7 @@ instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (URShiftL src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
|
||||
ins_encode %{
|
||||
@ -12535,6 +12617,7 @@ instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
|
||||
predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
|
||||
match(Set dst (RotateLeft src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
|
||||
ins_encode %{
|
||||
@ -12599,6 +12682,7 @@ instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
|
||||
predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
|
||||
match(Set dst (RotateRight src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
|
||||
ins_encode %{
|
||||
@ -12651,6 +12735,7 @@ instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
|
||||
predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
|
||||
match(Set dst (RotateLeft dst shift));
|
||||
effect(KILL cr);
|
||||
|
||||
format %{ "rolq $dst, $shift" %}
|
||||
ins_encode %{
|
||||
__ rolq($dst$$Register);
|
||||
@ -12664,6 +12749,7 @@ instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
|
||||
predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
|
||||
match(Set dst (RotateLeft src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
|
||||
ins_encode %{
|
||||
@ -12728,6 +12814,7 @@ instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
|
||||
predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
|
||||
match(Set dst (RotateRight src shift));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
|
||||
ins_encode %{
|
||||
@ -12805,7 +12892,7 @@ instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AndI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -12898,7 +12985,7 @@ instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AndI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -12942,7 +13029,7 @@ instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AndI src1 (LoadI src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
ins_cost(150);
|
||||
format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
|
||||
@ -13142,7 +13229,7 @@ instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (OrI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -13171,7 +13258,7 @@ instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (OrI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -13185,7 +13272,7 @@ instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (OrI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -13229,7 +13316,7 @@ instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (OrI src1 (LoadI src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
ins_cost(150);
|
||||
format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
|
||||
@ -13305,7 +13392,7 @@ instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (XorI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -13331,6 +13418,7 @@ instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
|
||||
%{
|
||||
match(Set dst (XorI src imm));
|
||||
predicate(UseAPX);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "enotl $dst, $src" %}
|
||||
ins_encode %{
|
||||
@ -13361,7 +13449,7 @@ instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
|
||||
predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
|
||||
match(Set dst (XorI src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
|
||||
ins_encode %{
|
||||
@ -13407,7 +13495,7 @@ instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (XorI src1 (LoadI src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
ins_cost(150);
|
||||
format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
|
||||
@ -13486,7 +13574,7 @@ instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AndL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -13542,7 +13630,7 @@ instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AndL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -13586,7 +13674,7 @@ instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (AndL src1 (LoadL src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
ins_cost(150);
|
||||
format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
|
||||
@ -13789,7 +13877,7 @@ instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (OrL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -13844,7 +13932,7 @@ instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (OrL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -13858,7 +13946,7 @@ instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (OrL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -13903,7 +13991,7 @@ instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (OrL src1 (LoadL src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
ins_cost(150);
|
||||
format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
|
||||
@ -13982,7 +14070,7 @@ instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (XorL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -14008,6 +14096,7 @@ instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
|
||||
%{
|
||||
predicate(UseAPX);
|
||||
match(Set dst (XorL src imm));
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "enotq $dst, $src" %}
|
||||
ins_encode %{
|
||||
@ -14038,7 +14127,7 @@ instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
|
||||
predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
|
||||
match(Set dst (XorL src1 src2));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
|
||||
|
||||
format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
|
||||
ins_encode %{
|
||||
@ -14084,7 +14173,7 @@ instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr
|
||||
predicate(UseAPX);
|
||||
match(Set dst (XorL src1 (LoadL src2)));
|
||||
effect(KILL cr);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
|
||||
flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
|
||||
|
||||
ins_cost(150);
|
||||
format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
|
||||
@ -16539,6 +16628,7 @@ instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (MinI src1 src2));
|
||||
effect(DEF dst, USE src1, USE src2);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
ins_cost(200);
|
||||
expand %{
|
||||
@ -16590,6 +16680,7 @@ instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
|
||||
predicate(UseAPX);
|
||||
match(Set dst (MaxI src1 src2));
|
||||
effect(DEF dst, USE src1, USE src2);
|
||||
flag(PD::Flag_ndd_demotable);
|
||||
|
||||
ins_cost(200);
|
||||
expand %{
|
||||
|
||||
@ -1038,6 +1038,8 @@ static void* dll_load_library(const char *filename, int *eno, char *ebuf, int eb
|
||||
dflags |= RTLD_MEMBER;
|
||||
}
|
||||
|
||||
Events::log_dll_message(nullptr, "Attempting to load shared library %s", filename);
|
||||
|
||||
void* result;
|
||||
const char* error_report = nullptr;
|
||||
JFR_ONLY(NativeLibraryLoadEvent load_event(filename, &result);)
|
||||
|
||||
@ -1035,6 +1035,8 @@ void *os::Bsd::dlopen_helper(const char *filename, int mode, char *ebuf, int ebu
|
||||
int rtn = fegetenv(&default_fenv);
|
||||
assert(rtn == 0, "fegetenv must succeed");
|
||||
|
||||
Events::log_dll_message(nullptr, "Attempting to load shared library %s", filename);
|
||||
|
||||
void* result;
|
||||
JFR_ONLY(NativeLibraryLoadEvent load_event(filename, &result);)
|
||||
result = ::dlopen(filename, RTLD_LAZY);
|
||||
|
||||
@ -159,9 +159,7 @@ physical_memory_size_type os::Linux::_physical_memory = 0;
|
||||
address os::Linux::_initial_thread_stack_bottom = nullptr;
|
||||
uintptr_t os::Linux::_initial_thread_stack_size = 0;
|
||||
|
||||
int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = nullptr;
|
||||
pthread_t os::Linux::_main_thread;
|
||||
bool os::Linux::_supports_fast_thread_cpu_time = false;
|
||||
const char * os::Linux::_libc_version = nullptr;
|
||||
const char * os::Linux::_libpthread_version = nullptr;
|
||||
|
||||
@ -1475,29 +1473,6 @@ void os::Linux::capture_initial_stack(size_t max_size) {
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// time support
|
||||
|
||||
void os::Linux::fast_thread_clock_init() {
|
||||
clockid_t clockid;
|
||||
struct timespec tp;
|
||||
int (*pthread_getcpuclockid_func)(pthread_t, clockid_t *) =
|
||||
(int(*)(pthread_t, clockid_t *)) dlsym(RTLD_DEFAULT, "pthread_getcpuclockid");
|
||||
|
||||
// Switch to using fast clocks for thread cpu time if
|
||||
// the clock_getres() returns 0 error code.
|
||||
// Note, that some kernels may support the current thread
|
||||
// clock (CLOCK_THREAD_CPUTIME_ID) but not the clocks
|
||||
// returned by the pthread_getcpuclockid().
|
||||
// If the fast POSIX clocks are supported then the clock_getres()
|
||||
// must return at least tp.tv_sec == 0 which means a resolution
|
||||
// better than 1 sec. This is extra check for reliability.
|
||||
|
||||
if (pthread_getcpuclockid_func &&
|
||||
pthread_getcpuclockid_func(_main_thread, &clockid) == 0 &&
|
||||
clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) {
|
||||
_supports_fast_thread_cpu_time = true;
|
||||
_pthread_getcpuclockid = pthread_getcpuclockid_func;
|
||||
}
|
||||
}
|
||||
|
||||
// thread_id is kernel thread id (similar to Solaris LWP id)
|
||||
intx os::current_thread_id() { return os::Linux::gettid(); }
|
||||
int os::current_process_id() {
|
||||
@ -1900,6 +1875,8 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
|
||||
assert(rtn == 0, "fegetenv must succeed");
|
||||
#endif // IA32
|
||||
|
||||
Events::log_dll_message(nullptr, "Attempting to load shared library %s", filename);
|
||||
|
||||
void* result;
|
||||
JFR_ONLY(NativeLibraryLoadEvent load_event(filename, &result);)
|
||||
result = ::dlopen(filename, RTLD_LAZY);
|
||||
@ -4328,7 +4305,7 @@ OSReturn os::get_native_priority(const Thread* const thread,
|
||||
// For reference, please, see IEEE Std 1003.1-2004:
|
||||
// http://www.unix.org/single_unix_specification
|
||||
|
||||
jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
|
||||
jlong os::Linux::total_thread_cpu_time(clockid_t clockid) {
|
||||
struct timespec tp;
|
||||
int status = clock_gettime(clockid, &tp);
|
||||
assert(status == 0, "clock_gettime error: %s", os::strerror(errno));
|
||||
@ -4556,8 +4533,6 @@ jint os::init_2(void) {
|
||||
|
||||
os::Posix::init_2();
|
||||
|
||||
Linux::fast_thread_clock_init();
|
||||
|
||||
if (PosixSignals::init() == JNI_ERR) {
|
||||
return JNI_ERR;
|
||||
}
|
||||
@ -4985,14 +4960,14 @@ int os::open(const char *path, int oflag, int mode) {
|
||||
return fd;
|
||||
}
|
||||
|
||||
static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time);
|
||||
static jlong user_thread_cpu_time(Thread *thread);
|
||||
|
||||
static jlong fast_cpu_time(Thread *thread) {
|
||||
static jlong total_thread_cpu_time(Thread *thread) {
|
||||
clockid_t clockid;
|
||||
int rc = os::Linux::pthread_getcpuclockid(thread->osthread()->pthread_id(),
|
||||
int rc = pthread_getcpuclockid(thread->osthread()->pthread_id(),
|
||||
&clockid);
|
||||
if (rc == 0) {
|
||||
return os::Linux::fast_thread_cpu_time(clockid);
|
||||
return os::Linux::total_thread_cpu_time(clockid);
|
||||
} else {
|
||||
// It's possible to encounter a terminated native thread that failed
|
||||
// to detach itself from the VM - which should result in ESRCH.
|
||||
@ -5009,41 +4984,31 @@ static jlong fast_cpu_time(Thread *thread) {
|
||||
// the fast estimate available on the platform.
|
||||
|
||||
jlong os::current_thread_cpu_time() {
|
||||
if (os::Linux::supports_fast_thread_cpu_time()) {
|
||||
return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
|
||||
} else {
|
||||
// return user + sys since the cost is the same
|
||||
return slow_thread_cpu_time(Thread::current(), true /* user + sys */);
|
||||
}
|
||||
return os::Linux::total_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
|
||||
}
|
||||
|
||||
jlong os::thread_cpu_time(Thread* thread) {
|
||||
// consistent with what current_thread_cpu_time() returns
|
||||
if (os::Linux::supports_fast_thread_cpu_time()) {
|
||||
return fast_cpu_time(thread);
|
||||
} else {
|
||||
return slow_thread_cpu_time(thread, true /* user + sys */);
|
||||
}
|
||||
return total_thread_cpu_time(thread);
|
||||
}
|
||||
|
||||
jlong os::current_thread_cpu_time(bool user_sys_cpu_time) {
|
||||
if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) {
|
||||
return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
|
||||
if (user_sys_cpu_time) {
|
||||
return os::Linux::total_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID);
|
||||
} else {
|
||||
return slow_thread_cpu_time(Thread::current(), user_sys_cpu_time);
|
||||
return user_thread_cpu_time(Thread::current());
|
||||
}
|
||||
}
|
||||
|
||||
jlong os::thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
|
||||
if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) {
|
||||
return fast_cpu_time(thread);
|
||||
if (user_sys_cpu_time) {
|
||||
return total_thread_cpu_time(thread);
|
||||
} else {
|
||||
return slow_thread_cpu_time(thread, user_sys_cpu_time);
|
||||
return user_thread_cpu_time(thread);
|
||||
}
|
||||
}
|
||||
|
||||
// -1 on error.
|
||||
static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
|
||||
static jlong user_thread_cpu_time(Thread *thread) {
|
||||
pid_t tid = thread->osthread()->thread_id();
|
||||
char *s;
|
||||
char stat[2048];
|
||||
@ -5080,11 +5045,8 @@ static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
|
||||
&ldummy, &ldummy, &ldummy, &ldummy, &ldummy,
|
||||
&user_time, &sys_time);
|
||||
if (count != 13) return -1;
|
||||
if (user_sys_cpu_time) {
|
||||
return ((jlong)sys_time + (jlong)user_time) * (1000000000 / os::Posix::clock_tics_per_second());
|
||||
} else {
|
||||
return (jlong)user_time * (1000000000 / os::Posix::clock_tics_per_second());
|
||||
}
|
||||
|
||||
return (jlong)user_time * (1000000000 / os::Posix::clock_tics_per_second());
|
||||
}
|
||||
|
||||
void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) {
|
||||
@ -5163,7 +5125,7 @@ int os::get_core_path(char* buffer, size_t bufferSize) {
|
||||
|
||||
if (core_pattern[0] == '|') {
|
||||
written = jio_snprintf(buffer, bufferSize,
|
||||
"\"%s\" (or dumping to %s/core.%d)",
|
||||
"\"%s\" (alternatively, falling back to %s/core.%d)",
|
||||
&core_pattern[1], p, current_process_id());
|
||||
} else if (pid_pos != nullptr) {
|
||||
*pid_pos = '\0';
|
||||
|
||||
@ -32,16 +32,12 @@
|
||||
class os::Linux {
|
||||
friend class os;
|
||||
|
||||
static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *);
|
||||
|
||||
static address _initial_thread_stack_bottom;
|
||||
static uintptr_t _initial_thread_stack_size;
|
||||
|
||||
static const char *_libc_version;
|
||||
static const char *_libpthread_version;
|
||||
|
||||
static bool _supports_fast_thread_cpu_time;
|
||||
|
||||
static GrowableArray<int>* _cpu_to_node;
|
||||
static GrowableArray<int>* _nindex_to_node;
|
||||
|
||||
@ -146,18 +142,7 @@ class os::Linux {
|
||||
static bool manually_expand_stack(JavaThread * t, address addr);
|
||||
static void expand_stack_to(address bottom);
|
||||
|
||||
// fast POSIX clocks support
|
||||
static void fast_thread_clock_init(void);
|
||||
|
||||
static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) {
|
||||
return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1;
|
||||
}
|
||||
|
||||
static bool supports_fast_thread_cpu_time() {
|
||||
return _supports_fast_thread_cpu_time;
|
||||
}
|
||||
|
||||
static jlong fast_thread_cpu_time(clockid_t clockid);
|
||||
static jlong total_thread_cpu_time(clockid_t clockid);
|
||||
|
||||
static jlong sendfile(int out_fd, int in_fd, jlong* offset, jlong count);
|
||||
|
||||
|
||||
@ -50,7 +50,14 @@ ProcSmapsParser::~ProcSmapsParser() {
|
||||
|
||||
bool ProcSmapsParser::read_line() {
|
||||
_line[0] = '\0';
|
||||
return ::fgets(_line, _linelen, _f) != nullptr;
|
||||
|
||||
if (::fgets(_line, _linelen, _f) == nullptr) {
|
||||
// On error or EOF, ensure deterministic empty buffer
|
||||
_line[0] = '\0';
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool ProcSmapsParser::is_header_line() {
|
||||
@ -101,8 +108,6 @@ void ProcSmapsParser::scan_additional_line(ProcSmapsInfo& out) {
|
||||
}
|
||||
}
|
||||
|
||||
// Starts or continues parsing. Returns true on success,
|
||||
// false on EOF or on error.
|
||||
bool ProcSmapsParser::parse_next(ProcSmapsInfo& out) {
|
||||
|
||||
// Information about a single mapping reaches across several lines.
|
||||
@ -117,15 +122,13 @@ bool ProcSmapsParser::parse_next(ProcSmapsInfo& out) {
|
||||
assert(is_header_line(), "Not a header line: \"%s\".", _line);
|
||||
scan_header_line(out);
|
||||
|
||||
// Now read until we encounter the next header line or EOF or an error.
|
||||
bool ok = false, stop = false;
|
||||
do {
|
||||
ok = read_line();
|
||||
stop = !ok || is_header_line();
|
||||
if (!stop) {
|
||||
scan_additional_line(out);
|
||||
while (true) {
|
||||
bool ok = read_line();
|
||||
if (!ok || is_header_line()) {
|
||||
break; // EOF or next header
|
||||
}
|
||||
} while (!stop);
|
||||
scan_additional_line(out);
|
||||
}
|
||||
|
||||
return ok;
|
||||
return true; // always return true if a mapping was parsed
|
||||
}
|
||||
|
||||
@ -84,8 +84,7 @@ public:
|
||||
ProcSmapsParser(FILE* f);
|
||||
~ProcSmapsParser();
|
||||
|
||||
// Starts or continues parsing. Returns true on success,
|
||||
// false on EOF or on error.
|
||||
// Starts or continues parsing. Returns true iff a mapping was parsed.
|
||||
bool parse_next(ProcSmapsInfo& out);
|
||||
};
|
||||
|
||||
|
||||
@ -108,41 +108,60 @@ size_t os::_os_min_stack_allowed = PTHREAD_STACK_MIN;
|
||||
|
||||
// Check core dump limit and report possible place where core can be found
|
||||
void os::check_core_dump_prerequisites(char* buffer, size_t bufferSize, bool check_only) {
|
||||
stringStream buf(buffer, bufferSize);
|
||||
if (!FLAG_IS_DEFAULT(CreateCoredumpOnCrash) && !CreateCoredumpOnCrash) {
|
||||
jio_snprintf(buffer, bufferSize, "CreateCoredumpOnCrash is disabled from command line");
|
||||
VMError::record_coredump_status(buffer, false);
|
||||
buf.print("CreateCoredumpOnCrash is disabled from command line");
|
||||
VMError::record_coredump_status(buf.freeze(), false);
|
||||
} else {
|
||||
struct rlimit rlim;
|
||||
bool success = true;
|
||||
bool warn = true;
|
||||
char core_path[PATH_MAX];
|
||||
if (get_core_path(core_path, PATH_MAX) <= 0) {
|
||||
jio_snprintf(buffer, bufferSize, "core.%d (may not exist)", current_process_id());
|
||||
// In the warning message, let the user know.
|
||||
if (check_only) {
|
||||
buf.print("the core path couldn't be determined. It commonly defaults to ");
|
||||
}
|
||||
buf.print("core.%d%s", current_process_id(), check_only ? "" : " (may not exist)");
|
||||
#ifdef LINUX
|
||||
} else if (core_path[0] == '"') { // redirect to user process
|
||||
jio_snprintf(buffer, bufferSize, "Core dumps may be processed with %s", core_path);
|
||||
if (check_only) {
|
||||
buf.print("core dumps may be further processed by the following: ");
|
||||
} else {
|
||||
buf.print("Determined by the following: ");
|
||||
}
|
||||
buf.print("%s", core_path);
|
||||
#endif
|
||||
} else if (getrlimit(RLIMIT_CORE, &rlim) != 0) {
|
||||
jio_snprintf(buffer, bufferSize, "%s (may not exist)", core_path);
|
||||
if (check_only) {
|
||||
buf.print("the rlimit couldn't be determined. If resource limits permit, the core dump will be located at ");
|
||||
}
|
||||
buf.print("%s%s", core_path, check_only ? "" : " (may not exist)");
|
||||
} else {
|
||||
switch(rlim.rlim_cur) {
|
||||
case RLIM_INFINITY:
|
||||
jio_snprintf(buffer, bufferSize, "%s", core_path);
|
||||
buf.print("%s", core_path);
|
||||
warn = false;
|
||||
break;
|
||||
case 0:
|
||||
jio_snprintf(buffer, bufferSize, "Core dumps have been disabled. To enable core dumping, try \"ulimit -c unlimited\" before starting Java again");
|
||||
buf.print("%s dumps have been disabled. To enable core dumping, try \"ulimit -c unlimited\" before starting Java again", check_only ? "core" : "Core");
|
||||
success = false;
|
||||
break;
|
||||
default:
|
||||
jio_snprintf(buffer, bufferSize, "%s (max size " UINT64_FORMAT " k). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", core_path, uint64_t(rlim.rlim_cur) / K);
|
||||
if (check_only) {
|
||||
buf.print("core dumps are constrained ");
|
||||
} else {
|
||||
buf.print( "%s ", core_path);
|
||||
}
|
||||
buf.print( "(max size " UINT64_FORMAT " k). To ensure a full core dump, try \"ulimit -c unlimited\" before starting Java again", uint64_t(rlim.rlim_cur) / K);
|
||||
break;
|
||||
}
|
||||
}
|
||||
const char* result = buf.freeze();
|
||||
if (!check_only) {
|
||||
VMError::record_coredump_status(buffer, success);
|
||||
VMError::record_coredump_status(result, success);
|
||||
} else if (warn) {
|
||||
warning("CreateCoredumpOnCrash specified, but %s", buffer);
|
||||
warning("CreateCoredumpOnCrash specified, but %s", result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1715,6 +1715,8 @@ static int _print_module(const char* fname, address base_address,
|
||||
// same architecture as Hotspot is running on
|
||||
void * os::dll_load(const char *name, char *ebuf, int ebuflen) {
|
||||
log_info(os)("attempting shared library load of %s", name);
|
||||
Events::log_dll_message(nullptr, "Attempting to load shared library %s", name);
|
||||
|
||||
void* result;
|
||||
JFR_ONLY(NativeLibraryLoadEvent load_event(name, &result);)
|
||||
result = LoadLibrary(name);
|
||||
|
||||
@ -50,11 +50,9 @@ double SharedRuntime::fmod_winx64(double x, double y)
|
||||
hx ^= sx; /* |x| */
|
||||
hy &= 0x7fffffff; /* |y| */
|
||||
|
||||
#pragma warning( disable : 4146 )
|
||||
/* purge off exception values */
|
||||
if ((hy | ly) == 0 || (hx >= 0x7ff00000) || /* y=0,or x not finite */
|
||||
((hy | ((ly | -ly) >> 31))>0x7ff00000)) /* or y is NaN */
|
||||
#pragma warning( default : 4146 )
|
||||
((hy | ((ly | -ly) >> 31))>0x7ff00000)) /* or y is NaN */
|
||||
return (x*y) / (x*y);
|
||||
if (hx <= hy) {
|
||||
if ((hx<hy) || (lx<ly)) return x; /* |x|<|y| return x */
|
||||
|
||||
@ -52,12 +52,16 @@ struct AtomicAccess::PlatformAdd {
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
template<size_t byte_size>
|
||||
template<typename T>
|
||||
inline T AtomicAccess::PlatformXchg<byte_size>::operator()(T volatile* dest,
|
||||
T exchange_value,
|
||||
atomic_memory_order order) const {
|
||||
STATIC_ASSERT(byte_size == sizeof(T));
|
||||
STATIC_ASSERT(byte_size == 4 || byte_size == 8);
|
||||
T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
|
||||
FULL_MEM_BARRIER;
|
||||
return res;
|
||||
|
||||
@ -52,6 +52,9 @@ inline D AtomicAccess::PlatformAdd<4>::fetch_then_add(D volatile* dest, I add_va
|
||||
return old_value;
|
||||
}
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
|
||||
|
||||
@ -66,6 +66,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I add_va
|
||||
return res;
|
||||
}
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
|
||||
|
||||
@ -113,6 +113,9 @@ inline D AtomicAccess::PlatformAdd<8>::fetch_then_add(D volatile* dest, I add_va
|
||||
return atomic_fastcall(stub, dest, add_value);
|
||||
}
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
|
||||
|
||||
@ -118,6 +118,8 @@ inline D AtomicAccess::PlatformAdd<4>::add_then_fetch(D volatile* dest, I add_va
|
||||
return add_using_helper<int32_t>(ARMAtomicFuncs::_add_func, dest, add_value);
|
||||
}
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
|
||||
@ -152,6 +152,9 @@ inline T AtomicAccess::PlatformCmpxchg<4>::operator()(T volatile* dest __attribu
|
||||
}
|
||||
#endif
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
template<size_t byte_size>
|
||||
template<typename T>
|
||||
inline T AtomicAccess::PlatformXchg<byte_size>::operator()(T volatile* dest,
|
||||
@ -164,6 +167,7 @@ inline T AtomicAccess::PlatformXchg<byte_size>::operator()(T volatile* dest,
|
||||
#endif
|
||||
|
||||
STATIC_ASSERT(byte_size == sizeof(T));
|
||||
STATIC_ASSERT(byte_size == 4 || byte_size == 8);
|
||||
|
||||
if (order != memory_order_relaxed) {
|
||||
FULL_MEM_BARRIER;
|
||||
|
||||
@ -209,6 +209,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I inc,
|
||||
//
|
||||
// The return value is the (unchanged) value from memory as it was when the
|
||||
// replacement succeeded.
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
|
||||
|
||||
@ -52,6 +52,9 @@ inline D AtomicAccess::PlatformAdd<4>::fetch_then_add(D volatile* dest, I add_va
|
||||
return old_value;
|
||||
}
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
|
||||
|
||||
@ -65,6 +65,9 @@ inline D AtomicAccess::PlatformAdd<8>::add_then_fetch(D volatile* dest, I add_va
|
||||
return res;
|
||||
}
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T AtomicAccess::PlatformXchg<4>::operator()(T volatile* dest,
|
||||
|
||||
@ -68,6 +68,9 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
|
||||
|
||||
#undef DEFINE_INTRINSIC_ADD
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
#define DEFINE_INTRINSIC_XCHG(IntrinsicName, IntrinsicType) \
|
||||
template<> \
|
||||
template<typename T> \
|
||||
@ -75,6 +78,8 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
|
||||
T exchange_value, \
|
||||
atomic_memory_order order) const { \
|
||||
STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(T)); \
|
||||
STATIC_ASSERT(sizeof(IntrinsicType) == 4 || \
|
||||
sizeof(IntrinsicType) == 8); \
|
||||
return PrimitiveConversions::cast<T>( \
|
||||
IntrinsicName(reinterpret_cast<IntrinsicType volatile *>(dest), \
|
||||
PrimitiveConversions::cast<IntrinsicType>(exchange_value))); \
|
||||
|
||||
@ -70,6 +70,9 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
|
||||
|
||||
#undef DEFINE_INTRINSIC_ADD
|
||||
|
||||
template<>
|
||||
struct AtomicAccess::PlatformXchg<1> : AtomicAccess::XchgUsingCmpxchg<1> {};
|
||||
|
||||
#define DEFINE_INTRINSIC_XCHG(IntrinsicName, IntrinsicType) \
|
||||
template<> \
|
||||
template<typename T> \
|
||||
@ -77,6 +80,8 @@ DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
|
||||
T exchange_value, \
|
||||
atomic_memory_order order) const { \
|
||||
STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(T)); \
|
||||
STATIC_ASSERT(sizeof(IntrinsicType) == 4 || \
|
||||
sizeof(IntrinsicType) == 8); \
|
||||
return PrimitiveConversions::cast<T>( \
|
||||
IntrinsicName(reinterpret_cast<IntrinsicType volatile *>(dest), \
|
||||
PrimitiveConversions::cast<IntrinsicType>(exchange_value))); \
|
||||
|
||||
@ -73,7 +73,7 @@ void ResolutionErrorTable::add_entry(const constantPoolHandle& pool, int cp_inde
|
||||
|
||||
ResolutionErrorKey key(pool(), cp_index);
|
||||
ResolutionErrorEntry *entry = new ResolutionErrorEntry(error, message, cause, cause_msg);
|
||||
_resolution_error_table->put(key, entry);
|
||||
_resolution_error_table->put_when_absent(key, entry);
|
||||
}
|
||||
|
||||
// create new nest host error entry
|
||||
@ -85,7 +85,7 @@ void ResolutionErrorTable::add_entry(const constantPoolHandle& pool, int cp_inde
|
||||
|
||||
ResolutionErrorKey key(pool(), cp_index);
|
||||
ResolutionErrorEntry *entry = new ResolutionErrorEntry(message);
|
||||
_resolution_error_table->put(key, entry);
|
||||
_resolution_error_table->put_when_absent(key, entry);
|
||||
}
|
||||
|
||||
// find entry in the table
|
||||
@ -126,6 +126,15 @@ ResolutionErrorEntry::~ResolutionErrorEntry() {
|
||||
}
|
||||
}
|
||||
|
||||
void ResolutionErrorEntry::set_nest_host_error(const char* message) {
|
||||
// If a message is already set, free it.
|
||||
if (nest_host_error() != nullptr) {
|
||||
FREE_C_HEAP_ARRAY(char, _nest_host_error);
|
||||
}
|
||||
_nest_host_error = message;
|
||||
}
|
||||
|
||||
|
||||
class ResolutionErrorDeleteIterate : StackObj {
|
||||
ConstantPool* p;
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -91,10 +91,7 @@ class ResolutionErrorEntry : public CHeapObj<mtClass> {
|
||||
~ResolutionErrorEntry();
|
||||
|
||||
// The incoming nest host error message is already in the C-Heap.
|
||||
void set_nest_host_error(const char* message) {
|
||||
_nest_host_error = message;
|
||||
}
|
||||
|
||||
void set_nest_host_error(const char* message);
|
||||
|
||||
Symbol* error() const { return _error; }
|
||||
const char* message() const { return _message; }
|
||||
|
||||
@ -1864,14 +1864,19 @@ void SystemDictionary::add_nest_host_error(const constantPoolHandle& pool,
|
||||
{
|
||||
MutexLocker ml(Thread::current(), SystemDictionary_lock);
|
||||
ResolutionErrorEntry* entry = ResolutionErrorTable::find_entry(pool, which);
|
||||
if (entry != nullptr && entry->nest_host_error() == nullptr) {
|
||||
if (entry == nullptr) {
|
||||
// Only add a new entry to the resolution error table if one hasn't been found for this
|
||||
// constant pool index. In this case resolution succeeded but there's an error in this nest host
|
||||
// that we use the table to record.
|
||||
assert(pool->resolved_klass_at(which) != nullptr, "klass should be resolved if there is no entry");
|
||||
ResolutionErrorTable::add_entry(pool, which, message);
|
||||
} else {
|
||||
// An existing entry means we had a true resolution failure (LinkageError) with our nest host, but we
|
||||
// still want to add the error message for the higher-level access checks to report. We should
|
||||
// only reach here under the same error condition, so we can ignore the potential race with setting
|
||||
// the message. If we see it is already set then we can ignore it.
|
||||
// the message, and set it again.
|
||||
assert(entry->nest_host_error() == nullptr || strcmp(entry->nest_host_error(), message) == 0, "should be the same message");
|
||||
entry->set_nest_host_error(message);
|
||||
} else {
|
||||
ResolutionErrorTable::add_entry(pool, which, message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1010,8 +1010,10 @@ void CompilationMemoryStatistic::print_error_report(outputStream* st) {
|
||||
oom_stats->print_peak_state_on(st);
|
||||
st->cr();
|
||||
}
|
||||
st->print_cr("Compiler Memory Statistic, 10 most expensive compilations:");
|
||||
print_all_by_size(st, false, false, 0, 10);
|
||||
if (Thread::current_or_null_safe() != nullptr) {
|
||||
st->print_cr("Compiler Memory Statistic, 10 most expensive compilations:");
|
||||
print_all_by_size(st, false, false, 0, 10);
|
||||
}
|
||||
}
|
||||
|
||||
void CompilationMemoryStatistic::print_final_report(outputStream* st) {
|
||||
|
||||
@ -33,10 +33,10 @@
|
||||
#include "utilities/align.hpp"
|
||||
|
||||
G1CollectedHeap* G1AllocRegion::_g1h = nullptr;
|
||||
G1HeapRegion* G1AllocRegion::_dummy_region = nullptr;
|
||||
Atomic<G1HeapRegion*> G1AllocRegion::_dummy_region;
|
||||
|
||||
void G1AllocRegion::setup(G1CollectedHeap* g1h, G1HeapRegion* dummy_region) {
|
||||
assert(_dummy_region == nullptr, "should be set once");
|
||||
assert(_dummy_region.load_relaxed() == nullptr, "should be set once");
|
||||
assert(dummy_region != nullptr, "pre-condition");
|
||||
assert(dummy_region->free() == 0, "pre-condition");
|
||||
|
||||
@ -46,11 +46,11 @@ void G1AllocRegion::setup(G1CollectedHeap* g1h, G1HeapRegion* dummy_region) {
|
||||
assert(dummy_region->par_allocate(1, 1, &assert_tmp) == nullptr, "should fail");
|
||||
|
||||
_g1h = g1h;
|
||||
_dummy_region = dummy_region;
|
||||
_dummy_region.release_store(dummy_region);
|
||||
}
|
||||
|
||||
size_t G1AllocRegion::fill_up_remaining_space(G1HeapRegion* alloc_region) {
|
||||
assert(alloc_region != nullptr && alloc_region != _dummy_region,
|
||||
assert(alloc_region != nullptr && alloc_region != _dummy_region.load_relaxed(),
|
||||
"pre-condition");
|
||||
size_t result = 0;
|
||||
|
||||
@ -111,13 +111,13 @@ size_t G1AllocRegion::retire_internal(G1HeapRegion* alloc_region, bool fill_up)
|
||||
}
|
||||
|
||||
size_t G1AllocRegion::retire(bool fill_up) {
|
||||
assert_alloc_region(_alloc_region != nullptr, "not initialized properly");
|
||||
assert_alloc_region(_alloc_region.load_relaxed() != nullptr, "not initialized properly");
|
||||
|
||||
size_t waste = 0;
|
||||
|
||||
trace("retiring");
|
||||
G1HeapRegion* alloc_region = _alloc_region;
|
||||
if (alloc_region != _dummy_region) {
|
||||
G1HeapRegion* alloc_region = _alloc_region.load_acquire();
|
||||
if (alloc_region != _dummy_region.load_relaxed()) {
|
||||
waste = retire_internal(alloc_region, fill_up);
|
||||
reset_alloc_region();
|
||||
}
|
||||
@ -127,7 +127,7 @@ size_t G1AllocRegion::retire(bool fill_up) {
|
||||
}
|
||||
|
||||
HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size) {
|
||||
assert_alloc_region(_alloc_region == _dummy_region, "pre-condition");
|
||||
assert_alloc_region(_alloc_region.load_relaxed() == _dummy_region.load_relaxed(), "pre-condition");
|
||||
|
||||
trace("attempting region allocation");
|
||||
G1HeapRegion* new_alloc_region = allocate_new_region(word_size);
|
||||
@ -138,7 +138,6 @@ HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size) {
|
||||
HeapWord* result = new_alloc_region->allocate(word_size);
|
||||
assert_alloc_region(result != nullptr, "the allocation should succeeded");
|
||||
|
||||
OrderAccess::storestore();
|
||||
// Note that we first perform the allocation and then we store the
|
||||
// region in _alloc_region. This is the reason why an active region
|
||||
// can never be empty.
|
||||
@ -154,16 +153,16 @@ HeapWord* G1AllocRegion::new_alloc_region_and_allocate(size_t word_size) {
|
||||
|
||||
void G1AllocRegion::init() {
|
||||
trace("initializing");
|
||||
assert_alloc_region(_alloc_region == nullptr, "pre-condition");
|
||||
assert_alloc_region(_dummy_region != nullptr, "should have been set");
|
||||
_alloc_region = _dummy_region;
|
||||
assert_alloc_region(_alloc_region.load_relaxed() == nullptr, "pre-condition");
|
||||
assert_alloc_region(_dummy_region.load_relaxed() != nullptr, "should have been set");
|
||||
_alloc_region.release_store(_dummy_region.load_relaxed());
|
||||
_count = 0;
|
||||
trace("initialized");
|
||||
}
|
||||
|
||||
void G1AllocRegion::set(G1HeapRegion* alloc_region) {
|
||||
trace("setting");
|
||||
assert_alloc_region(_alloc_region == _dummy_region && _count == 0, "pre-condition");
|
||||
assert_alloc_region(_alloc_region.load_relaxed() == _dummy_region.load_relaxed() && _count == 0, "pre-condition");
|
||||
|
||||
update_alloc_region(alloc_region);
|
||||
trace("set");
|
||||
@ -175,19 +174,19 @@ void G1AllocRegion::update_alloc_region(G1HeapRegion* alloc_region) {
|
||||
// maintain the "the alloc region cannot be empty" invariant.
|
||||
assert_alloc_region(alloc_region != nullptr && !alloc_region->is_empty(), "pre-condition");
|
||||
|
||||
_alloc_region = alloc_region;
|
||||
_alloc_region.release_store(alloc_region);
|
||||
_count += 1;
|
||||
trace("updated");
|
||||
}
|
||||
|
||||
G1HeapRegion* G1AllocRegion::release() {
|
||||
trace("releasing");
|
||||
G1HeapRegion* alloc_region = _alloc_region;
|
||||
G1HeapRegion* alloc_region = _alloc_region.load_acquire();
|
||||
retire(false /* fill_up */);
|
||||
assert_alloc_region(_alloc_region == _dummy_region, "post-condition of retire()");
|
||||
_alloc_region = nullptr;
|
||||
assert_alloc_region(_alloc_region.load_relaxed() == _dummy_region.load_relaxed(), "post-condition of retire()");
|
||||
_alloc_region.store_relaxed(nullptr);
|
||||
trace("released");
|
||||
return (alloc_region == _dummy_region) ? nullptr : alloc_region;
|
||||
return (alloc_region == _dummy_region.load_relaxed()) ? nullptr : alloc_region;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
@ -211,12 +210,13 @@ void G1AllocRegion::trace(const char* str, size_t min_word_size, size_t desired_
|
||||
|
||||
out->print("%s: %u ", _name, _count);
|
||||
|
||||
if (_alloc_region == nullptr) {
|
||||
G1HeapRegion* alloc_region = _alloc_region.load_acquire();
|
||||
if (alloc_region == nullptr) {
|
||||
out->print("null");
|
||||
} else if (_alloc_region == _dummy_region) {
|
||||
} else if (alloc_region == _dummy_region.load_relaxed()) {
|
||||
out->print("DUMMY");
|
||||
} else {
|
||||
out->print(HR_FORMAT, HR_FORMAT_PARAMS(_alloc_region));
|
||||
out->print(HR_FORMAT, HR_FORMAT_PARAMS(alloc_region));
|
||||
}
|
||||
|
||||
out->print(" : %s", str);
|
||||
@ -235,7 +235,7 @@ void G1AllocRegion::trace(const char* str, size_t min_word_size, size_t desired_
|
||||
#endif // PRODUCT
|
||||
|
||||
G1AllocRegion::G1AllocRegion(const char* name, uint node_index)
|
||||
: _alloc_region(nullptr),
|
||||
: _alloc_region(),
|
||||
_count(0),
|
||||
_name(name),
|
||||
_node_index(node_index)
|
||||
@ -250,7 +250,7 @@ void MutatorAllocRegion::retire_region(G1HeapRegion* alloc_region) {
|
||||
}
|
||||
|
||||
void MutatorAllocRegion::init() {
|
||||
assert(_retained_alloc_region == nullptr, "Pre-condition");
|
||||
assert(_retained_alloc_region.load_relaxed() == nullptr, "Pre-condition");
|
||||
G1AllocRegion::init();
|
||||
_wasted_bytes = 0;
|
||||
}
|
||||
@ -261,8 +261,9 @@ bool MutatorAllocRegion::should_retain(G1HeapRegion* region) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (_retained_alloc_region != nullptr &&
|
||||
free_bytes < _retained_alloc_region->free()) {
|
||||
G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
|
||||
if (retained_alloc_region != nullptr &&
|
||||
free_bytes < retained_alloc_region->free()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -278,10 +279,11 @@ size_t MutatorAllocRegion::retire(bool fill_up) {
|
||||
// free than the currently retained region.
|
||||
if (should_retain(current_region)) {
|
||||
trace("mutator retained");
|
||||
if (_retained_alloc_region != nullptr) {
|
||||
waste = retire_internal(_retained_alloc_region, true);
|
||||
G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
|
||||
if (retained_alloc_region != nullptr) {
|
||||
waste = retire_internal(retained_alloc_region, true);
|
||||
}
|
||||
_retained_alloc_region = current_region;
|
||||
_retained_alloc_region.release_store(current_region);
|
||||
} else {
|
||||
waste = retire_internal(current_region, fill_up);
|
||||
}
|
||||
@ -300,7 +302,7 @@ size_t MutatorAllocRegion::used_in_alloc_regions() {
|
||||
used += hr->used();
|
||||
}
|
||||
|
||||
hr = _retained_alloc_region;
|
||||
hr = _retained_alloc_region.load_acquire();
|
||||
if (hr != nullptr) {
|
||||
used += hr->used();
|
||||
}
|
||||
@ -313,9 +315,10 @@ G1HeapRegion* MutatorAllocRegion::release() {
|
||||
// The retained alloc region must be retired and this must be
|
||||
// done after the above call to release the mutator alloc region,
|
||||
// since it might update the _retained_alloc_region member.
|
||||
if (_retained_alloc_region != nullptr) {
|
||||
_wasted_bytes += retire_internal(_retained_alloc_region, false);
|
||||
_retained_alloc_region = nullptr;
|
||||
G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
|
||||
if (retained_alloc_region != nullptr) {
|
||||
_wasted_bytes += retire_internal(retained_alloc_region, false);
|
||||
_retained_alloc_region.store_relaxed(nullptr);
|
||||
}
|
||||
log_debug(gc, alloc, region)("Mutator Allocation stats, regions: %u, wasted size: %zu%s (%4.1f%%)",
|
||||
count(),
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
#include "gc/g1/g1HeapRegion.hpp"
|
||||
#include "gc/g1/g1HeapRegionAttr.hpp"
|
||||
#include "gc/g1/g1NUMA.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
|
||||
class G1CollectedHeap;
|
||||
|
||||
@ -40,8 +41,6 @@ class G1CollectedHeap;
|
||||
// replaced.
|
||||
|
||||
class G1AllocRegion : public CHeapObj<mtGC> {
|
||||
|
||||
private:
|
||||
// The active allocating region we are currently allocating out
|
||||
// of. The invariant is that if this object is initialized (i.e.,
|
||||
// init() has been called and release() has not) then _alloc_region
|
||||
@ -52,7 +51,7 @@ private:
|
||||
// then _alloc_region is null and this object should not be used to
|
||||
// satisfy allocation requests (it was done this way to force the
|
||||
// correct use of init() and release()).
|
||||
G1HeapRegion* volatile _alloc_region;
|
||||
Atomic<G1HeapRegion*> _alloc_region;
|
||||
|
||||
// It keeps track of the distinct number of regions that are used
|
||||
// for allocation in the active interval of this object, i.e.,
|
||||
@ -71,7 +70,7 @@ private:
|
||||
// == end()). When we don't have a valid active region we make
|
||||
// _alloc_region point to this. This allows us to skip checking
|
||||
// whether the _alloc_region is null or not.
|
||||
static G1HeapRegion* _dummy_region;
|
||||
static Atomic<G1HeapRegion*> _dummy_region;
|
||||
|
||||
// After a region is allocated by alloc_new_region, this
|
||||
// method is used to set it as the active alloc_region
|
||||
@ -124,9 +123,9 @@ public:
|
||||
static void setup(G1CollectedHeap* g1h, G1HeapRegion* dummy_region);
|
||||
|
||||
G1HeapRegion* get() const {
|
||||
G1HeapRegion * hr = _alloc_region;
|
||||
G1HeapRegion * hr = _alloc_region.load_acquire();
|
||||
// Make sure that the dummy region does not escape this class.
|
||||
return (hr == _dummy_region) ? nullptr : hr;
|
||||
return (hr == _dummy_region.load_relaxed()) ? nullptr : hr;
|
||||
}
|
||||
|
||||
uint count() { return _count; }
|
||||
@ -177,7 +176,7 @@ private:
|
||||
// Retained allocation region. Used to lower the waste generated
|
||||
// during mutation by having two active regions if the free space
|
||||
// in a region about to be retired still could fit a TLAB.
|
||||
G1HeapRegion* volatile _retained_alloc_region;
|
||||
Atomic<G1HeapRegion*> _retained_alloc_region;
|
||||
|
||||
// Decide if the region should be retained, based on the free size
|
||||
// in it and the free size in the currently retained region, if any.
|
||||
|
||||
@ -32,13 +32,13 @@
|
||||
#define assert_alloc_region(p, message) \
|
||||
do { \
|
||||
assert((p), "[%s] %s c: %u r: " PTR_FORMAT, \
|
||||
_name, (message), _count, p2i(_alloc_region) \
|
||||
_name, (message), _count, p2i(_alloc_region.load_relaxed()) \
|
||||
); \
|
||||
} while (0)
|
||||
|
||||
|
||||
inline void G1AllocRegion::reset_alloc_region() {
|
||||
_alloc_region = _dummy_region;
|
||||
_alloc_region.store_relaxed(_dummy_region.load_relaxed());
|
||||
}
|
||||
|
||||
inline HeapWord* G1AllocRegion::par_allocate(G1HeapRegion* alloc_region, size_t word_size) {
|
||||
@ -51,7 +51,7 @@ inline HeapWord* G1AllocRegion::par_allocate(G1HeapRegion* alloc_region, size_t
|
||||
inline HeapWord* G1AllocRegion::attempt_allocation(size_t min_word_size,
|
||||
size_t desired_word_size,
|
||||
size_t* actual_word_size) {
|
||||
G1HeapRegion* alloc_region = _alloc_region;
|
||||
G1HeapRegion* alloc_region = _alloc_region.load_acquire();
|
||||
assert_alloc_region(alloc_region != nullptr && !alloc_region->is_empty(), "not initialized properly");
|
||||
|
||||
HeapWord* result = alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
|
||||
@ -97,8 +97,9 @@ inline HeapWord* G1AllocRegion::attempt_allocation_using_new_region(size_t min_w
|
||||
inline HeapWord* MutatorAllocRegion::attempt_retained_allocation(size_t min_word_size,
|
||||
size_t desired_word_size,
|
||||
size_t* actual_word_size) {
|
||||
if (_retained_alloc_region != nullptr) {
|
||||
HeapWord* result = _retained_alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
|
||||
G1HeapRegion* retained_alloc_region = _retained_alloc_region.load_acquire();
|
||||
if (retained_alloc_region != nullptr) {
|
||||
HeapWord* result = retained_alloc_region->par_allocate(min_word_size, desired_word_size, actual_word_size);
|
||||
if (result != nullptr) {
|
||||
trace("alloc retained", min_word_size, desired_word_size, *actual_word_size, result);
|
||||
return result;
|
||||
|
||||
@ -77,10 +77,11 @@ void G1Arguments::initialize_alignments() {
|
||||
}
|
||||
|
||||
size_t G1Arguments::conservative_max_heap_alignment() {
|
||||
if (FLAG_IS_DEFAULT(G1HeapRegionSize)) {
|
||||
return G1HeapRegion::max_ergonomics_size();
|
||||
}
|
||||
return G1HeapRegion::max_region_size();
|
||||
const size_t region_size = FLAG_IS_DEFAULT(G1HeapRegionSize)
|
||||
? G1HeapRegion::max_ergonomics_size()
|
||||
: G1HeapRegion::max_region_size();
|
||||
|
||||
return calculate_heap_alignment(region_size);
|
||||
}
|
||||
|
||||
void G1Arguments::initialize_verification_types() {
|
||||
|
||||
@ -2355,7 +2355,8 @@ static void print_region_type(outputStream* st, const char* type, uint count, bo
|
||||
}
|
||||
|
||||
void G1CollectedHeap::print_heap_on(outputStream* st) const {
|
||||
size_t heap_used = Heap_lock->owned_by_self() ? used() : used_unlocked();
|
||||
size_t heap_used = (Thread::current_or_null_safe() != nullptr &&
|
||||
Heap_lock->owned_by_self()) ? used() : used_unlocked();
|
||||
st->print("%-20s", "garbage-first heap");
|
||||
st->print(" total reserved %zuK, committed %zuK, used %zuK",
|
||||
_hrm.reserved().byte_size()/K, capacity()/K, heap_used/K);
|
||||
|
||||
@ -611,23 +611,24 @@ void G1RemSet::scan_collection_set_code_roots(G1ParScanThreadState* pss,
|
||||
G1GCPhaseTimes::GCParPhases coderoots_phase,
|
||||
G1GCPhaseTimes::GCParPhases objcopy_phase) {
|
||||
EventGCPhaseParallel event;
|
||||
|
||||
Tickspan code_root_scan_time;
|
||||
Tickspan code_root_trim_partially_time;
|
||||
G1EvacPhaseWithTrimTimeTracker timer(pss, code_root_scan_time, code_root_trim_partially_time);
|
||||
|
||||
G1GCPhaseTimes* p = _g1h->phase_times();
|
||||
{
|
||||
G1EvacPhaseWithTrimTimeTracker timer(pss, code_root_scan_time, code_root_trim_partially_time);
|
||||
|
||||
G1ScanCodeRootsClosure cl(_scan_state, pss, worker_id);
|
||||
// Code roots work distribution occurs inside the iteration method. So scan all collection
|
||||
// set regions for all threads.
|
||||
_g1h->collection_set_iterate_increment_from(&cl, worker_id);
|
||||
G1ScanCodeRootsClosure cl(_scan_state, pss, worker_id);
|
||||
// Code roots work distribution occurs inside the iteration method. So scan all collection
|
||||
// set regions for all threads.
|
||||
_g1h->collection_set_iterate_increment_from(&cl, worker_id);
|
||||
|
||||
p->record_or_add_thread_work_item(coderoots_phase, worker_id, cl.code_roots_scanned(), G1GCPhaseTimes::CodeRootsScannedNMethods);
|
||||
}
|
||||
|
||||
p->record_or_add_time_secs(coderoots_phase, worker_id, code_root_scan_time.seconds());
|
||||
p->add_time_secs(objcopy_phase, worker_id, code_root_trim_partially_time.seconds());
|
||||
|
||||
p->record_or_add_thread_work_item(coderoots_phase, worker_id, cl.code_roots_scanned(), G1GCPhaseTimes::CodeRootsScannedNMethods);
|
||||
|
||||
event.commit(GCId::current(), worker_id, G1GCPhaseTimes::phase_name(coderoots_phase));
|
||||
}
|
||||
|
||||
|
||||
@ -37,8 +37,45 @@
|
||||
#include "utilities/defaultStream.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
|
||||
size_t ParallelArguments::conservative_max_heap_alignment() {
|
||||
return compute_heap_alignment();
|
||||
static size_t num_young_spaces() {
|
||||
// When using NUMA, we create one MutableNUMASpace for each NUMA node
|
||||
const size_t num_eden_spaces = UseNUMA ? os::numa_get_groups_num() : 1;
|
||||
|
||||
// The young generation must have room for eden + two survivors
|
||||
return num_eden_spaces + 2;
|
||||
}
|
||||
|
||||
static size_t num_old_spaces() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
void ParallelArguments::initialize_alignments() {
|
||||
// Initialize card size before initializing alignments
|
||||
CardTable::initialize_card_size();
|
||||
const size_t card_table_alignment = CardTable::ct_max_alignment_constraint();
|
||||
SpaceAlignment = ParallelScavengeHeap::default_space_alignment();
|
||||
|
||||
if (UseLargePages) {
|
||||
const size_t total_spaces = num_young_spaces() + num_old_spaces();
|
||||
const size_t page_size = os::page_size_for_region_unaligned(MaxHeapSize, total_spaces);
|
||||
ParallelScavengeHeap::set_desired_page_size(page_size);
|
||||
|
||||
if (page_size == os::vm_page_size()) {
|
||||
log_warning(gc, heap)("MaxHeapSize (%zu) must be large enough for %zu * page-size; Disabling UseLargePages for heap",
|
||||
MaxHeapSize, total_spaces);
|
||||
}
|
||||
|
||||
if (page_size > SpaceAlignment) {
|
||||
SpaceAlignment = page_size;
|
||||
}
|
||||
|
||||
HeapAlignment = lcm(page_size, card_table_alignment);
|
||||
|
||||
} else {
|
||||
assert(is_aligned(SpaceAlignment, os::vm_page_size()), "");
|
||||
ParallelScavengeHeap::set_desired_page_size(os::vm_page_size());
|
||||
HeapAlignment = card_table_alignment;
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelArguments::initialize() {
|
||||
@ -98,49 +135,36 @@ void ParallelArguments::initialize() {
|
||||
FullGCForwarding::initialize_flags(heap_reserved_size_bytes());
|
||||
}
|
||||
|
||||
void ParallelArguments::initialize_alignments() {
|
||||
// Initialize card size before initializing alignments
|
||||
CardTable::initialize_card_size();
|
||||
SpaceAlignment = ParallelScavengeHeap::default_space_alignment();
|
||||
HeapAlignment = compute_heap_alignment();
|
||||
}
|
||||
size_t ParallelArguments::conservative_max_heap_alignment() {
|
||||
// The card marking array and the offset arrays for old generations are
|
||||
// committed in os pages as well. Make sure they are entirely full (to
|
||||
// avoid partial page problems), e.g. if 512 bytes heap corresponds to 1
|
||||
// byte entry and the os page size is 4096, the maximum heap size should
|
||||
// be 512*4096 = 2MB aligned.
|
||||
|
||||
void ParallelArguments::initialize_heap_flags_and_sizes_one_pass() {
|
||||
// Do basic sizing work
|
||||
GenArguments::initialize_heap_flags_and_sizes();
|
||||
}
|
||||
size_t alignment = CardTable::ct_max_alignment_constraint();
|
||||
|
||||
void ParallelArguments::initialize_heap_flags_and_sizes() {
|
||||
initialize_heap_flags_and_sizes_one_pass();
|
||||
|
||||
if (!UseLargePages) {
|
||||
ParallelScavengeHeap::set_desired_page_size(os::vm_page_size());
|
||||
return;
|
||||
if (UseLargePages) {
|
||||
// In presence of large pages we have to make sure that our
|
||||
// alignment is large page aware.
|
||||
alignment = lcm(os::large_page_size(), alignment);
|
||||
}
|
||||
|
||||
// If using large-page, need to update SpaceAlignment so that spaces are page-size aligned.
|
||||
const size_t min_pages = 4; // 1 for eden + 1 for each survivor + 1 for old
|
||||
const size_t page_sz = os::page_size_for_region_aligned(MinHeapSize, min_pages);
|
||||
ParallelScavengeHeap::set_desired_page_size(page_sz);
|
||||
|
||||
if (page_sz == os::vm_page_size()) {
|
||||
log_warning(gc, heap)("MinHeapSize (%zu) must be large enough for 4 * page-size; Disabling UseLargePages for heap", MinHeapSize);
|
||||
return;
|
||||
}
|
||||
|
||||
// Space is largepage-aligned.
|
||||
size_t new_alignment = page_sz;
|
||||
if (new_alignment != SpaceAlignment) {
|
||||
SpaceAlignment = new_alignment;
|
||||
// Redo everything from the start
|
||||
initialize_heap_flags_and_sizes_one_pass();
|
||||
}
|
||||
}
|
||||
|
||||
size_t ParallelArguments::heap_reserved_size_bytes() {
|
||||
return MaxHeapSize;
|
||||
return alignment;
|
||||
}
|
||||
|
||||
CollectedHeap* ParallelArguments::create_heap() {
|
||||
return new ParallelScavengeHeap();
|
||||
}
|
||||
|
||||
size_t ParallelArguments::young_gen_size_lower_bound() {
|
||||
return num_young_spaces() * SpaceAlignment;
|
||||
}
|
||||
|
||||
size_t ParallelArguments::old_gen_size_lower_bound() {
|
||||
return num_old_spaces() * SpaceAlignment;
|
||||
}
|
||||
|
||||
size_t ParallelArguments::heap_reserved_size_bytes() {
|
||||
return MaxHeapSize;
|
||||
}
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Red Hat, Inc. and/or its affiliates.
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -25,21 +26,16 @@
|
||||
#ifndef SHARE_GC_PARALLEL_PARALLELARGUMENTS_HPP
|
||||
#define SHARE_GC_PARALLEL_PARALLELARGUMENTS_HPP
|
||||
|
||||
#include "gc/shared/gcArguments.hpp"
|
||||
#include "gc/shared/genArguments.hpp"
|
||||
|
||||
class CollectedHeap;
|
||||
|
||||
class ParallelArguments : public GenArguments {
|
||||
private:
|
||||
virtual void initialize_alignments();
|
||||
virtual void initialize_heap_flags_and_sizes();
|
||||
|
||||
void initialize_heap_flags_and_sizes_one_pass();
|
||||
|
||||
virtual void initialize();
|
||||
virtual size_t conservative_max_heap_alignment();
|
||||
virtual CollectedHeap* create_heap();
|
||||
virtual size_t young_gen_size_lower_bound();
|
||||
virtual size_t old_gen_size_lower_bound();
|
||||
|
||||
public:
|
||||
static size_t heap_reserved_size_bytes();
|
||||
|
||||
@ -307,9 +307,13 @@ HeapWord* ParallelScavengeHeap::mem_allocate_cas_noexpand(size_t size, bool is_t
|
||||
|
||||
HeapWord* ParallelScavengeHeap::mem_allocate_work(size_t size, bool is_tlab) {
|
||||
for (uint loop_count = 0; /* empty */; ++loop_count) {
|
||||
HeapWord* result = mem_allocate_cas_noexpand(size, is_tlab);
|
||||
if (result != nullptr) {
|
||||
return result;
|
||||
HeapWord* result;
|
||||
{
|
||||
ConditionalMutexLocker locker(Heap_lock, !is_init_completed());
|
||||
result = mem_allocate_cas_noexpand(size, is_tlab);
|
||||
if (result != nullptr) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Read total_collections() under the lock so that multiple
|
||||
@ -326,10 +330,15 @@ HeapWord* ParallelScavengeHeap::mem_allocate_work(size_t size, bool is_tlab) {
|
||||
}
|
||||
|
||||
if (!is_init_completed()) {
|
||||
// Can't do GC; try heap expansion to satisfy the request.
|
||||
result = expand_heap_and_allocate(size, is_tlab);
|
||||
if (result != nullptr) {
|
||||
return result;
|
||||
// Double checked locking, this ensure that is_init_completed() does not
|
||||
// transition while expanding the heap.
|
||||
MonitorLocker ml(InitCompleted_lock, Monitor::_no_safepoint_check_flag);
|
||||
if (!is_init_completed()) {
|
||||
// Can't do GC; try heap expansion to satisfy the request.
|
||||
result = expand_heap_and_allocate(size, is_tlab);
|
||||
if (result != nullptr) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Red Hat, Inc. and/or its affiliates.
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -27,11 +28,49 @@
|
||||
#include "gc/shared/fullGCForwarding.hpp"
|
||||
#include "gc/shared/gcArguments.hpp"
|
||||
|
||||
static size_t compute_heap_alignment() {
|
||||
// The card marking array and the offset arrays for old generations are
|
||||
// committed in os pages as well. Make sure they are entirely full (to
|
||||
// avoid partial page problems), e.g. if 512 bytes heap corresponds to 1
|
||||
// byte entry and the os page size is 4096, the maximum heap size should
|
||||
// be 512*4096 = 2MB aligned.
|
||||
|
||||
size_t alignment = CardTable::ct_max_alignment_constraint();
|
||||
|
||||
if (UseLargePages) {
|
||||
// In presence of large pages we have to make sure that our
|
||||
// alignment is large page aware.
|
||||
alignment = lcm(os::large_page_size(), alignment);
|
||||
}
|
||||
|
||||
return alignment;
|
||||
}
|
||||
|
||||
void SerialArguments::initialize_alignments() {
|
||||
// Initialize card size before initializing alignments
|
||||
CardTable::initialize_card_size();
|
||||
SpaceAlignment = (size_t)Generation::GenGrain;
|
||||
HeapAlignment = compute_heap_alignment();
|
||||
}
|
||||
|
||||
void SerialArguments::initialize() {
|
||||
GCArguments::initialize();
|
||||
FullGCForwarding::initialize_flags(MaxHeapSize);
|
||||
}
|
||||
|
||||
size_t SerialArguments::conservative_max_heap_alignment() {
|
||||
return MAX2((size_t)Generation::GenGrain, compute_heap_alignment());
|
||||
}
|
||||
|
||||
CollectedHeap* SerialArguments::create_heap() {
|
||||
return new SerialHeap();
|
||||
}
|
||||
|
||||
size_t SerialArguments::young_gen_size_lower_bound() {
|
||||
// The young generation must be aligned and have room for eden + two survivors
|
||||
return 3 * SpaceAlignment;
|
||||
}
|
||||
|
||||
size_t SerialArguments::old_gen_size_lower_bound() {
|
||||
return SpaceAlignment;
|
||||
}
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Red Hat, Inc. and/or its affiliates.
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -27,12 +28,14 @@
|
||||
|
||||
#include "gc/shared/genArguments.hpp"
|
||||
|
||||
class CollectedHeap;
|
||||
|
||||
class SerialArguments : public GenArguments {
|
||||
private:
|
||||
virtual void initialize_alignments();
|
||||
virtual void initialize();
|
||||
virtual size_t conservative_max_heap_alignment();
|
||||
virtual CollectedHeap* create_heap();
|
||||
virtual size_t young_gen_size_lower_bound();
|
||||
virtual size_t old_gen_size_lower_bound();
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_SERIAL_SERIALARGUMENTS_HPP
|
||||
|
||||
@ -304,9 +304,12 @@ HeapWord* SerialHeap::mem_allocate_work(size_t size, bool is_tlab) {
|
||||
HeapWord* result = nullptr;
|
||||
|
||||
for (uint try_count = 1; /* break */; try_count++) {
|
||||
result = mem_allocate_cas_noexpand(size, is_tlab);
|
||||
if (result != nullptr) {
|
||||
break;
|
||||
{
|
||||
ConditionalMutexLocker locker(Heap_lock, !is_init_completed());
|
||||
result = mem_allocate_cas_noexpand(size, is_tlab);
|
||||
if (result != nullptr) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint gc_count_before; // Read inside the Heap_lock locked region.
|
||||
{
|
||||
@ -320,10 +323,15 @@ HeapWord* SerialHeap::mem_allocate_work(size_t size, bool is_tlab) {
|
||||
}
|
||||
|
||||
if (!is_init_completed()) {
|
||||
// Can't do GC; try heap expansion to satisfy the request.
|
||||
result = expand_heap_and_allocate(size, is_tlab);
|
||||
if (result != nullptr) {
|
||||
return result;
|
||||
// Double checked locking, this ensure that is_init_completed() does not
|
||||
// transition while expanding the heap.
|
||||
MonitorLocker ml(InitCompleted_lock, Monitor::_no_safepoint_check_flag);
|
||||
if (!is_init_completed()) {
|
||||
// Can't do GC; try heap expansion to satisfy the request.
|
||||
result = expand_heap_and_allocate(size, is_tlab);
|
||||
if (result != nullptr) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
|
||||
#include "cppstdlib/limits.hpp"
|
||||
#include "gc/shared/freeListAllocator.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/lockFreeStack.hpp"
|
||||
@ -38,7 +39,7 @@ class BufferNode {
|
||||
|
||||
InternalSizeType _index;
|
||||
InternalSizeType _capacity;
|
||||
BufferNode* volatile _next;
|
||||
Atomic<BufferNode*> _next;
|
||||
void* _buffer[1]; // Pseudo flexible array member.
|
||||
|
||||
BufferNode(InternalSizeType capacity)
|
||||
@ -58,11 +59,11 @@ public:
|
||||
return std::numeric_limits<InternalSizeType>::max();
|
||||
}
|
||||
|
||||
static BufferNode* volatile* next_ptr(BufferNode& bn) { return &bn._next; }
|
||||
static Atomic<BufferNode*>* next_ptr(BufferNode& bn) { return &bn._next; }
|
||||
typedef LockFreeStack<BufferNode, &next_ptr> Stack;
|
||||
|
||||
BufferNode* next() const { return _next; }
|
||||
void set_next(BufferNode* n) { _next = n; }
|
||||
BufferNode* next() const { return _next.load_relaxed(); }
|
||||
void set_next(BufferNode* n) { _next.store_relaxed(n); }
|
||||
size_t index() const { return _index; }
|
||||
|
||||
void set_index(size_t i) {
|
||||
|
||||
@ -62,24 +62,6 @@ void GCArguments::initialize_heap_sizes() {
|
||||
initialize_size_info();
|
||||
}
|
||||
|
||||
size_t GCArguments::compute_heap_alignment() {
|
||||
// The card marking array and the offset arrays for old generations are
|
||||
// committed in os pages as well. Make sure they are entirely full (to
|
||||
// avoid partial page problems), e.g. if 512 bytes heap corresponds to 1
|
||||
// byte entry and the os page size is 4096, the maximum heap size should
|
||||
// be 512*4096 = 2MB aligned.
|
||||
|
||||
size_t alignment = CardTable::ct_max_alignment_constraint();
|
||||
|
||||
if (UseLargePages) {
|
||||
// In presence of large pages we have to make sure that our
|
||||
// alignment is large page aware.
|
||||
alignment = lcm(os::large_page_size(), alignment);
|
||||
}
|
||||
|
||||
return alignment;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void GCArguments::assert_flags() {
|
||||
assert(InitialHeapSize <= MaxHeapSize, "Ergonomics decided on incompatible initial and maximum heap sizes");
|
||||
|
||||
@ -45,6 +45,8 @@ protected:
|
||||
|
||||
public:
|
||||
virtual void initialize();
|
||||
|
||||
// Return the (conservative) maximum heap alignment
|
||||
virtual size_t conservative_max_heap_alignment() = 0;
|
||||
|
||||
// Used by heap size heuristics to determine max
|
||||
@ -59,8 +61,6 @@ public:
|
||||
}
|
||||
|
||||
void initialize_heap_sizes();
|
||||
|
||||
static size_t compute_heap_alignment();
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_SHARED_GCARGUMENTS_HPP
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#include "runtime/mutex.hpp"
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
#include "runtime/thread.hpp"
|
||||
#include "utilities/ostream.hpp"
|
||||
|
||||
stringStream* GCLogPrecious::_lines = nullptr;
|
||||
@ -83,7 +84,8 @@ void GCLogPrecious::print_on_error(outputStream* st) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!_lock->try_lock_without_rank_check()) {
|
||||
if (Thread::current_or_null_safe() == nullptr ||
|
||||
!_lock->try_lock_without_rank_check()) {
|
||||
st->print_cr("<Skipped>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -291,7 +291,7 @@
|
||||
"size on systems with small physical memory size") \
|
||||
range(0.0, 100.0) \
|
||||
\
|
||||
product(double, InitialRAMPercentage, 0.2, \
|
||||
product(double, InitialRAMPercentage, 0.0, \
|
||||
"Percentage of real memory used for initial heap size") \
|
||||
range(0.0, 100.0) \
|
||||
\
|
||||
|
||||
@ -42,17 +42,6 @@ size_t MaxOldSize = 0;
|
||||
// See more in JDK-8346005
|
||||
size_t OldSize = ScaleForWordSize(4*M);
|
||||
|
||||
size_t GenArguments::conservative_max_heap_alignment() { return (size_t)Generation::GenGrain; }
|
||||
|
||||
static size_t young_gen_size_lower_bound() {
|
||||
// The young generation must be aligned and have room for eden + two survivors
|
||||
return 3 * SpaceAlignment;
|
||||
}
|
||||
|
||||
static size_t old_gen_size_lower_bound() {
|
||||
return SpaceAlignment;
|
||||
}
|
||||
|
||||
size_t GenArguments::scale_by_NewRatio_aligned(size_t base_size, size_t alignment) {
|
||||
return align_down_bounded(base_size / (NewRatio + 1), alignment);
|
||||
}
|
||||
@ -64,13 +53,6 @@ static size_t bound_minus_alignment(size_t desired_size,
|
||||
return MIN2(desired_size, max_minus);
|
||||
}
|
||||
|
||||
void GenArguments::initialize_alignments() {
|
||||
// Initialize card size before initializing alignments
|
||||
CardTable::initialize_card_size();
|
||||
SpaceAlignment = (size_t)Generation::GenGrain;
|
||||
HeapAlignment = compute_heap_alignment();
|
||||
}
|
||||
|
||||
void GenArguments::initialize_heap_flags_and_sizes() {
|
||||
GCArguments::initialize_heap_flags_and_sizes();
|
||||
|
||||
|
||||
@ -38,17 +38,16 @@ extern size_t OldSize;
|
||||
class GenArguments : public GCArguments {
|
||||
friend class TestGenCollectorPolicy; // Testing
|
||||
private:
|
||||
virtual void initialize_alignments();
|
||||
virtual void initialize_size_info();
|
||||
|
||||
// Return the (conservative) maximum heap alignment
|
||||
virtual size_t conservative_max_heap_alignment();
|
||||
|
||||
DEBUG_ONLY(void assert_flags();)
|
||||
DEBUG_ONLY(void assert_size_info();)
|
||||
|
||||
static size_t scale_by_NewRatio_aligned(size_t base_size, size_t alignment);
|
||||
|
||||
virtual size_t young_gen_size_lower_bound() = 0;
|
||||
virtual size_t old_gen_size_lower_bound() = 0;
|
||||
|
||||
protected:
|
||||
virtual void initialize_heap_flags_and_sizes();
|
||||
};
|
||||
|
||||
@ -250,7 +250,7 @@ static JVMFlag::Error MaxSizeForHeapAlignment(const char* name, size_t value, bo
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
heap_alignment = GCArguments::compute_heap_alignment();
|
||||
heap_alignment = Arguments::conservative_max_heap_alignment();
|
||||
}
|
||||
|
||||
return MaxSizeForAlignment(name, value, heap_alignment, verbose);
|
||||
@ -285,7 +285,7 @@ JVMFlag::Error SoftMaxHeapSizeConstraintFunc(size_t value, bool verbose) {
|
||||
JVMFlag::Error HeapBaseMinAddressConstraintFunc(size_t value, bool verbose) {
|
||||
// If an overflow happened in Arguments::set_heap_size(), MaxHeapSize will have too large a value.
|
||||
// Check for this by ensuring that MaxHeapSize plus the requested min base address still fit within max_uintx.
|
||||
if (UseCompressedOops && FLAG_IS_ERGO(MaxHeapSize) && (value > (max_uintx - MaxHeapSize))) {
|
||||
if (value > (max_uintx - MaxHeapSize)) {
|
||||
JVMFlag::printError(verbose,
|
||||
"HeapBaseMinAddress (%zu) or MaxHeapSize (%zu) is too large. "
|
||||
"Sum of them must be less than or equal to maximum of size_t (%zu)\n",
|
||||
|
||||
@ -27,7 +27,6 @@
|
||||
#include "logging/log.hpp"
|
||||
#include "memory/allocation.inline.hpp"
|
||||
#include "oops/oop.inline.hpp"
|
||||
#include "runtime/atomicAccess.hpp"
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
#include "runtime/safepoint.hpp"
|
||||
@ -85,28 +84,28 @@ SATBMarkQueueSet::~SATBMarkQueueSet() {
|
||||
// remains set until the count is reduced to zero.
|
||||
|
||||
// Increment count. If count > threshold, set flag, else maintain flag.
|
||||
static void increment_count(volatile size_t* cfptr, size_t threshold) {
|
||||
static void increment_count(Atomic<size_t>* cfptr, size_t threshold) {
|
||||
size_t old;
|
||||
size_t value = AtomicAccess::load(cfptr);
|
||||
size_t value = cfptr->load_relaxed();
|
||||
do {
|
||||
old = value;
|
||||
value += 2;
|
||||
assert(value > old, "overflow");
|
||||
if (value > threshold) value |= 1;
|
||||
value = AtomicAccess::cmpxchg(cfptr, old, value);
|
||||
value = cfptr->compare_exchange(old, value);
|
||||
} while (value != old);
|
||||
}
|
||||
|
||||
// Decrement count. If count == 0, clear flag, else maintain flag.
|
||||
static void decrement_count(volatile size_t* cfptr) {
|
||||
static void decrement_count(Atomic<size_t>* cfptr) {
|
||||
size_t old;
|
||||
size_t value = AtomicAccess::load(cfptr);
|
||||
size_t value = cfptr->load_relaxed();
|
||||
do {
|
||||
assert((value >> 1) != 0, "underflow");
|
||||
old = value;
|
||||
value -= 2;
|
||||
if (value <= 1) value = 0;
|
||||
value = AtomicAccess::cmpxchg(cfptr, old, value);
|
||||
value = cfptr->compare_exchange(old, value);
|
||||
} while (value != old);
|
||||
}
|
||||
|
||||
@ -332,7 +331,7 @@ void SATBMarkQueueSet::print_all(const char* msg) {
|
||||
#endif // PRODUCT
|
||||
|
||||
void SATBMarkQueueSet::abandon_completed_buffers() {
|
||||
AtomicAccess::store(&_count_and_process_flag, size_t(0));
|
||||
_count_and_process_flag.store_relaxed(0u);
|
||||
BufferNode* buffers_to_delete = _list.pop_all();
|
||||
while (buffers_to_delete != nullptr) {
|
||||
BufferNode* bn = buffers_to_delete;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -29,6 +29,7 @@
|
||||
#include "memory/allocation.hpp"
|
||||
#include "memory/padded.hpp"
|
||||
#include "oops/oopsHierarchy.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
|
||||
class Thread;
|
||||
class Monitor;
|
||||
@ -87,7 +88,7 @@ class SATBMarkQueueSet: public PtrQueueSet {
|
||||
|
||||
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, 0);
|
||||
PaddedEnd<BufferNode::Stack> _list;
|
||||
volatile size_t _count_and_process_flag;
|
||||
Atomic<size_t> _count_and_process_flag;
|
||||
// These are rarely (if ever) changed, so same cache line as count.
|
||||
size_t _process_completed_buffers_threshold;
|
||||
size_t _buffer_enqueue_threshold;
|
||||
@ -148,12 +149,12 @@ public:
|
||||
// The number of buffers in the list. Racy and not updated atomically
|
||||
// with the set of completed buffers.
|
||||
size_t completed_buffers_num() const {
|
||||
return _count_and_process_flag >> 1;
|
||||
return _count_and_process_flag.load_relaxed() >> 1;
|
||||
}
|
||||
|
||||
// Return true if completed buffers should be processed.
|
||||
bool process_completed_buffers() const {
|
||||
return (_count_and_process_flag & 1) != 0;
|
||||
return (_count_and_process_flag.load_relaxed() & 1) != 0;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
|
||||
@ -37,6 +37,7 @@
|
||||
#include "runtime/globals_extension.hpp"
|
||||
#include "runtime/java.hpp"
|
||||
#include "utilities/defaultStream.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
|
||||
void ShenandoahArguments::initialize() {
|
||||
#if !(defined AARCH64 || defined AMD64 || defined PPC64 || defined RISCV64)
|
||||
@ -205,7 +206,7 @@ void ShenandoahArguments::initialize() {
|
||||
}
|
||||
|
||||
size_t ShenandoahArguments::conservative_max_heap_alignment() {
|
||||
size_t align = ShenandoahMaxRegionSize;
|
||||
size_t align = next_power_of_2(ShenandoahMaxRegionSize);
|
||||
if (UseLargePages) {
|
||||
align = MAX2(align, os::large_page_size());
|
||||
}
|
||||
|
||||
@ -208,13 +208,13 @@ oop ShenandoahGenerationalHeap::evacuate_object(oop p, Thread* thread) {
|
||||
|
||||
assert(ShenandoahThreadLocalData::is_evac_allowed(thread), "must be enclosed in oom-evac scope");
|
||||
|
||||
ShenandoahHeapRegion* r = heap_region_containing(p);
|
||||
assert(!r->is_humongous(), "never evacuate humongous objects");
|
||||
ShenandoahHeapRegion* from_region = heap_region_containing(p);
|
||||
assert(!from_region->is_humongous(), "never evacuate humongous objects");
|
||||
|
||||
ShenandoahAffiliation target_gen = r->affiliation();
|
||||
// gc_generation() can change asynchronously and should not be used here.
|
||||
assert(active_generation() != nullptr, "Error");
|
||||
if (active_generation()->is_young() && target_gen == YOUNG_GENERATION) {
|
||||
// Try to keep the object in the same generation
|
||||
const ShenandoahAffiliation target_gen = from_region->affiliation();
|
||||
|
||||
if (target_gen == YOUNG_GENERATION) {
|
||||
markWord mark = p->mark();
|
||||
if (mark.is_marked()) {
|
||||
// Already forwarded.
|
||||
@ -224,26 +224,31 @@ oop ShenandoahGenerationalHeap::evacuate_object(oop p, Thread* thread) {
|
||||
if (mark.has_displaced_mark_helper()) {
|
||||
// We don't want to deal with MT here just to ensure we read the right mark word.
|
||||
// Skip the potential promotion attempt for this one.
|
||||
} else if (age_census()->is_tenurable(r->age() + mark.age())) {
|
||||
oop result = try_evacuate_object(p, thread, r, OLD_GENERATION);
|
||||
} else if (age_census()->is_tenurable(from_region->age() + mark.age())) {
|
||||
// If the object is tenurable, try to promote it
|
||||
oop result = try_evacuate_object<YOUNG_GENERATION, OLD_GENERATION>(p, thread, from_region->age());
|
||||
|
||||
// If we failed to promote this aged object, we'll fall through to code below and evacuate to young-gen.
|
||||
if (result != nullptr) {
|
||||
return result;
|
||||
}
|
||||
// If we failed to promote this aged object, we'll fall through to code below and evacuate to young-gen.
|
||||
}
|
||||
return try_evacuate_object<YOUNG_GENERATION, YOUNG_GENERATION>(p, thread, from_region->age());
|
||||
}
|
||||
return try_evacuate_object(p, thread, r, target_gen);
|
||||
|
||||
assert(target_gen == OLD_GENERATION, "Expected evacuation to old");
|
||||
return try_evacuate_object<OLD_GENERATION, OLD_GENERATION>(p, thread, from_region->age());
|
||||
}
|
||||
|
||||
// try_evacuate_object registers the object and dirties the associated remembered set information when evacuating
|
||||
// to OLD_GENERATION.
|
||||
oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, ShenandoahHeapRegion* from_region,
|
||||
ShenandoahAffiliation target_gen) {
|
||||
template<ShenandoahAffiliation FROM_GENERATION, ShenandoahAffiliation TO_GENERATION>
|
||||
oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, uint from_region_age) {
|
||||
bool alloc_from_lab = true;
|
||||
bool has_plab = false;
|
||||
HeapWord* copy = nullptr;
|
||||
size_t size = ShenandoahForwarding::size(p);
|
||||
bool is_promotion = (target_gen == OLD_GENERATION) && from_region->is_young();
|
||||
constexpr bool is_promotion = (TO_GENERATION == OLD_GENERATION) && (FROM_GENERATION == YOUNG_GENERATION);
|
||||
|
||||
#ifdef ASSERT
|
||||
if (ShenandoahOOMDuringEvacALot &&
|
||||
@ -252,7 +257,7 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
|
||||
} else {
|
||||
#endif
|
||||
if (UseTLAB) {
|
||||
switch (target_gen) {
|
||||
switch (TO_GENERATION) {
|
||||
case YOUNG_GENERATION: {
|
||||
copy = allocate_from_gclab(thread, size);
|
||||
if ((copy == nullptr) && (size < ShenandoahThreadLocalData::gclab_size(thread))) {
|
||||
@ -300,7 +305,7 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
|
||||
if (copy == nullptr) {
|
||||
// If we failed to allocate in LAB, we'll try a shared allocation.
|
||||
if (!is_promotion || !has_plab || (size > PLAB::min_size())) {
|
||||
ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared_gc(size, target_gen, is_promotion);
|
||||
ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared_gc(size, TO_GENERATION, is_promotion);
|
||||
copy = allocate_memory(req);
|
||||
alloc_from_lab = false;
|
||||
}
|
||||
@ -314,8 +319,8 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
|
||||
#endif
|
||||
|
||||
if (copy == nullptr) {
|
||||
if (target_gen == OLD_GENERATION) {
|
||||
if (from_region->is_young()) {
|
||||
if (TO_GENERATION == OLD_GENERATION) {
|
||||
if (FROM_GENERATION == YOUNG_GENERATION) {
|
||||
// Signal that promotion failed. Will evacuate this old object somewhere in young gen.
|
||||
old_generation()->handle_failed_promotion(thread, size);
|
||||
return nullptr;
|
||||
@ -327,14 +332,12 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
|
||||
}
|
||||
|
||||
control_thread()->handle_alloc_failure_evac(size);
|
||||
|
||||
oom_evac_handler()->handle_out_of_memory_during_evacuation();
|
||||
|
||||
return ShenandoahBarrierSet::resolve_forwarded(p);
|
||||
}
|
||||
|
||||
if (ShenandoahEvacTracking) {
|
||||
evac_tracker()->begin_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen);
|
||||
evac_tracker()->begin_evacuation(thread, size * HeapWordSize, FROM_GENERATION, TO_GENERATION);
|
||||
}
|
||||
|
||||
// Copy the object:
|
||||
@ -342,8 +345,8 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
|
||||
oop copy_val = cast_to_oop(copy);
|
||||
|
||||
// Update the age of the evacuated object
|
||||
if (target_gen == YOUNG_GENERATION && is_aging_cycle()) {
|
||||
ShenandoahHeap::increase_object_age(copy_val, from_region->age() + 1);
|
||||
if (TO_GENERATION == YOUNG_GENERATION && is_aging_cycle()) {
|
||||
increase_object_age(copy_val, from_region_age + 1);
|
||||
}
|
||||
|
||||
// Try to install the new forwarding pointer.
|
||||
@ -360,18 +363,12 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
|
||||
|
||||
if (ShenandoahEvacTracking) {
|
||||
// Record that the evacuation succeeded
|
||||
evac_tracker()->end_evacuation(thread, size * HeapWordSize, from_region->affiliation(), target_gen);
|
||||
evac_tracker()->end_evacuation(thread, size * HeapWordSize, FROM_GENERATION, TO_GENERATION);
|
||||
}
|
||||
|
||||
if (target_gen == OLD_GENERATION) {
|
||||
old_generation()->handle_evacuation(copy, size, from_region->is_young());
|
||||
} else {
|
||||
// When copying to the old generation above, we don't care
|
||||
// about recording object age in the census stats.
|
||||
assert(target_gen == YOUNG_GENERATION, "Error");
|
||||
if (TO_GENERATION == OLD_GENERATION) {
|
||||
old_generation()->handle_evacuation(copy, size);
|
||||
}
|
||||
shenandoah_assert_correct(nullptr, copy_val);
|
||||
return copy_val;
|
||||
} else {
|
||||
// Failed to evacuate. We need to deal with the object that is left behind. Since this
|
||||
// new allocation is certainly after TAMS, it will be considered live in the next cycle.
|
||||
@ -382,7 +379,7 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
|
||||
// For LAB allocations, it is enough to rollback the allocation ptr. Either the next
|
||||
// object will overwrite this stale copy, or the filler object on LAB retirement will
|
||||
// do this.
|
||||
switch (target_gen) {
|
||||
switch (TO_GENERATION) {
|
||||
case YOUNG_GENERATION: {
|
||||
ShenandoahThreadLocalData::gclab(thread)->undo_allocation(copy, size);
|
||||
break;
|
||||
@ -405,14 +402,16 @@ oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, Shena
|
||||
// we have to keep the fwdptr initialized and pointing to our (stale) copy.
|
||||
assert(size >= ShenandoahHeap::min_fill_size(), "previously allocated object known to be larger than min_size");
|
||||
fill_with_object(copy, size);
|
||||
shenandoah_assert_correct(nullptr, copy_val);
|
||||
// For non-LAB allocations, the object has already been registered
|
||||
}
|
||||
shenandoah_assert_correct(nullptr, result);
|
||||
return result;
|
||||
}
|
||||
shenandoah_assert_correct(nullptr, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
template oop ShenandoahGenerationalHeap::try_evacuate_object<YOUNG_GENERATION, YOUNG_GENERATION>(oop p, Thread* thread, uint from_region_age);
|
||||
template oop ShenandoahGenerationalHeap::try_evacuate_object<YOUNG_GENERATION, OLD_GENERATION>(oop p, Thread* thread, uint from_region_age);
|
||||
template oop ShenandoahGenerationalHeap::try_evacuate_object<OLD_GENERATION, OLD_GENERATION>(oop p, Thread* thread, uint from_region_age);
|
||||
|
||||
inline HeapWord* ShenandoahGenerationalHeap::allocate_from_plab(Thread* thread, size_t size, bool is_promotion) {
|
||||
assert(UseTLAB, "TLABs should be enabled");
|
||||
|
||||
|
||||
@ -87,7 +87,9 @@ public:
|
||||
void update_region_ages(ShenandoahMarkingContext* ctx);
|
||||
|
||||
oop evacuate_object(oop p, Thread* thread) override;
|
||||
oop try_evacuate_object(oop p, Thread* thread, ShenandoahHeapRegion* from_region, ShenandoahAffiliation target_gen);
|
||||
|
||||
template<ShenandoahAffiliation FROM_REGION, ShenandoahAffiliation TO_REGION>
|
||||
oop try_evacuate_object(oop p, Thread* thread, uint from_region_age);
|
||||
|
||||
// In the generational mode, we will use these two functions for young, mixed, and global collections.
|
||||
// For young and mixed, the generation argument will be the young generation, otherwise it will be the global generation.
|
||||
|
||||
@ -34,4 +34,5 @@ inline bool ShenandoahGenerationalHeap::is_tenurable(const ShenandoahHeapRegion*
|
||||
return _age_census->is_tenurable(r->age());
|
||||
}
|
||||
|
||||
|
||||
#endif // SHARE_GC_SHENANDOAH_SHENANDOAHGENERATIONALHEAP_INLINE_HPP
|
||||
|
||||
@ -1015,7 +1015,7 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req
|
||||
// Record the plab configuration for this result and register the object.
|
||||
if (result != nullptr && req.is_old()) {
|
||||
old_generation()->configure_plab_for_current_thread(req);
|
||||
if (req.type() == ShenandoahAllocRequest::_alloc_shared_gc) {
|
||||
if (!req.is_lab_alloc()) {
|
||||
// Register the newly allocated object while we're holding the global lock since there's no synchronization
|
||||
// built in to the implementation of register_object(). There are potential races when multiple independent
|
||||
// threads are allocating objects, some of which might span the same card region. For example, consider
|
||||
|
||||
@ -619,7 +619,7 @@ void ShenandoahOldGeneration::log_failed_promotion(LogStream& ls, Thread* thread
|
||||
}
|
||||
}
|
||||
|
||||
void ShenandoahOldGeneration::handle_evacuation(HeapWord* obj, size_t words, bool promotion) {
|
||||
void ShenandoahOldGeneration::handle_evacuation(HeapWord* obj, size_t words) const {
|
||||
// Only register the copy of the object that won the evacuation race.
|
||||
_card_scan->register_object_without_lock(obj);
|
||||
|
||||
|
||||
@ -179,7 +179,7 @@ public:
|
||||
void log_failed_promotion(LogStream& ls, Thread* thread, size_t size) const;
|
||||
|
||||
// A successful evacuation re-dirties the cards and registers the object with the remembered set
|
||||
void handle_evacuation(HeapWord* obj, size_t words, bool promotion);
|
||||
void handle_evacuation(HeapWord* obj, size_t words) const;
|
||||
|
||||
// Clear the flag after it is consumed by the control thread
|
||||
bool clear_failed_evacuation() {
|
||||
|
||||
@ -1087,7 +1087,6 @@ private:
|
||||
ZRelocateSmallAllocator _small_allocator;
|
||||
ZRelocateMediumAllocator _medium_allocator;
|
||||
const size_t _total_forwardings;
|
||||
volatile size_t _numa_local_forwardings;
|
||||
|
||||
public:
|
||||
ZRelocateTask(ZRelocationSet* relocation_set,
|
||||
@ -1104,8 +1103,7 @@ public:
|
||||
_medium_targets(medium_targets),
|
||||
_small_allocator(_generation),
|
||||
_medium_allocator(_generation, shared_medium_targets),
|
||||
_total_forwardings(relocation_set->nforwardings()),
|
||||
_numa_local_forwardings(0) {
|
||||
_total_forwardings(relocation_set->nforwardings()) {
|
||||
|
||||
for (uint32_t i = 0; i < ZNUMA::count(); i++) {
|
||||
ZRelocationSetParallelIterator* const iter = _iters->addr(i);
|
||||
@ -1124,18 +1122,17 @@ public:
|
||||
|
||||
// Signal that we're not using the queue anymore. Used mostly for asserts.
|
||||
_queue->deactivate();
|
||||
|
||||
if (ZNUMA::is_enabled()) {
|
||||
log_debug(gc, reloc, numa)("Forwardings relocated NUMA-locally: %zu / %zu (%.0f%%)",
|
||||
_numa_local_forwardings, _total_forwardings, percent_of(_numa_local_forwardings, _total_forwardings));
|
||||
}
|
||||
}
|
||||
|
||||
virtual void work() {
|
||||
ZRelocateWork<ZRelocateSmallAllocator> small(&_small_allocator, _small_targets->addr(), _generation);
|
||||
ZRelocateWork<ZRelocateMediumAllocator> medium(&_medium_allocator, _medium_targets->addr(), _generation);
|
||||
|
||||
const uint32_t num_nodes = ZNUMA::count();
|
||||
uint32_t numa_local_forwardings_worker = 0;
|
||||
const uint32_t start_node = ZNUMA::id();
|
||||
uint32_t current_node = start_node;
|
||||
bool has_affinity = false;
|
||||
bool has_affinity_current_node = false;
|
||||
|
||||
const auto do_forwarding = [&](ZForwarding* forwarding) {
|
||||
ZPage* const page = forwarding->page();
|
||||
@ -1167,26 +1164,30 @@ public:
|
||||
|
||||
const auto do_forwarding_one_from_iter = [&]() {
|
||||
ZForwarding* forwarding;
|
||||
const uint32_t start_node = ZNUMA::id();
|
||||
uint32_t current_node = start_node;
|
||||
|
||||
for (uint32_t i = 0; i < num_nodes; i++) {
|
||||
for (;;) {
|
||||
if (_iters->get(current_node).next_if(&forwarding, check_numa_local, current_node)) {
|
||||
claim_and_do_forwarding(forwarding);
|
||||
|
||||
if (current_node == start_node) {
|
||||
// Track if this forwarding was relocated on the local NUMA node
|
||||
numa_local_forwardings_worker++;
|
||||
// Set thread affinity for NUMA-local processing (if needed)
|
||||
if (UseNUMA && !has_affinity_current_node) {
|
||||
os::numa_set_thread_affinity(Thread::current(), ZNUMA::numa_id_to_node(current_node));
|
||||
has_affinity = true;
|
||||
has_affinity_current_node = true;
|
||||
}
|
||||
|
||||
// Perform the forwarding task
|
||||
claim_and_do_forwarding(forwarding);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check next node.
|
||||
// No work found on the current node, move to the next node
|
||||
current_node = (current_node + 1) % num_nodes;
|
||||
}
|
||||
has_affinity_current_node = false;
|
||||
|
||||
return false;
|
||||
// If we've looped back to the starting node there's no more work to do
|
||||
if (current_node == start_node) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (;;) {
|
||||
@ -1209,11 +1210,13 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if (ZNUMA::is_enabled()) {
|
||||
AtomicAccess::add(&_numa_local_forwardings, numa_local_forwardings_worker, memory_order_relaxed);
|
||||
}
|
||||
|
||||
_queue->leave();
|
||||
|
||||
if (UseNUMA && has_affinity) {
|
||||
// Restore the affinity of the thread so that it isn't bound to a specific
|
||||
// node any more
|
||||
os::numa_set_thread_affinity(Thread::current(), -1);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void resize_workers(uint nworkers) {
|
||||
|
||||
@ -130,7 +130,10 @@ static void z_verify_root_oop_object(zaddress addr, void* p) {
|
||||
|
||||
static void z_verify_old_oop(zpointer* p) {
|
||||
const zpointer o = *p;
|
||||
assert(o != zpointer::null, "Old should not contain raw null");
|
||||
if (o == zpointer::null) {
|
||||
guarantee(ZGeneration::young()->is_phase_mark_complete(), "Only possible when flip promoting");
|
||||
guarantee(ZHeap::heap()->page(p)->is_allocating(), "Raw nulls only possible in allocating pages");
|
||||
}
|
||||
if (!z_is_null_relaxed(o)) {
|
||||
if (ZPointer::is_mark_good(o)) {
|
||||
// Even though the pointer is mark good, we can't verify that it should
|
||||
|
||||
@ -217,7 +217,8 @@ static bool compute_top_frame(const JfrSampleRequest& request, frame& top_frame,
|
||||
const PcDesc* const pc_desc = get_pc_desc(sampled_nm, sampled_pc);
|
||||
if (is_valid(pc_desc)) {
|
||||
intptr_t* const synthetic_sp = sender_sp - sampled_nm->frame_size();
|
||||
top_frame = frame(synthetic_sp, synthetic_sp, sender_sp - 2, pc_desc->real_pc(sampled_nm), sampled_nm);
|
||||
intptr_t* const synthetic_fp = sender_sp AARCH64_ONLY( - frame::sender_sp_offset);
|
||||
top_frame = frame(synthetic_sp, synthetic_sp, synthetic_fp, pc_desc->real_pc(sampled_nm), sampled_nm);
|
||||
in_continuation = is_in_continuation(top_frame, jt);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -437,7 +437,7 @@ ReservedSpace HeapReserver::Instance::try_reserve_range(char *highest_start,
|
||||
|
||||
if (reserved.is_reserved()) {
|
||||
if (reserved.base() >= aligned_heap_base_min_address &&
|
||||
size <= (uintptr_t)(upper_bound - reserved.base())) {
|
||||
size <= (size_t)(upper_bound - reserved.base())) {
|
||||
// Got a successful reservation.
|
||||
return reserved;
|
||||
}
|
||||
@ -546,16 +546,16 @@ ReservedHeapSpace HeapReserver::Instance::reserve_compressed_oops_heap(const siz
|
||||
|
||||
const size_t attach_point_alignment = lcm(alignment, os_attach_point_alignment);
|
||||
|
||||
char* aligned_heap_base_min_address = align_up((char*)HeapBaseMinAddress, alignment);
|
||||
size_t noaccess_prefix = ((aligned_heap_base_min_address + size) > (char*)OopEncodingHeapMax) ?
|
||||
uintptr_t aligned_heap_base_min_address = align_up(MAX2(HeapBaseMinAddress, alignment), alignment);
|
||||
size_t noaccess_prefix = ((aligned_heap_base_min_address + size) > OopEncodingHeapMax) ?
|
||||
noaccess_prefix_size : 0;
|
||||
|
||||
ReservedSpace reserved{};
|
||||
|
||||
// Attempt to alloc at user-given address.
|
||||
if (!FLAG_IS_DEFAULT(HeapBaseMinAddress)) {
|
||||
reserved = try_reserve_memory(size + noaccess_prefix, alignment, page_size, aligned_heap_base_min_address);
|
||||
if (reserved.base() != aligned_heap_base_min_address) { // Enforce this exact address.
|
||||
reserved = try_reserve_memory(size + noaccess_prefix, alignment, page_size, (char*)aligned_heap_base_min_address);
|
||||
if (reserved.base() != (char*)aligned_heap_base_min_address) { // Enforce this exact address.
|
||||
release(reserved);
|
||||
reserved = {};
|
||||
}
|
||||
@ -575,38 +575,41 @@ ReservedHeapSpace HeapReserver::Instance::reserve_compressed_oops_heap(const siz
|
||||
|
||||
// Attempt to allocate so that we can run without base and scale (32-Bit unscaled compressed oops).
|
||||
// Give it several tries from top of range to bottom.
|
||||
if (aligned_heap_base_min_address + size <= (char *)UnscaledOopHeapMax) {
|
||||
if (aligned_heap_base_min_address + size <= UnscaledOopHeapMax) {
|
||||
|
||||
// Calc address range within we try to attach (range of possible start addresses).
|
||||
char* const highest_start = align_down((char *)UnscaledOopHeapMax - size, attach_point_alignment);
|
||||
char* const lowest_start = align_up(aligned_heap_base_min_address, attach_point_alignment);
|
||||
reserved = try_reserve_range(highest_start, lowest_start, attach_point_alignment,
|
||||
aligned_heap_base_min_address, (char *)UnscaledOopHeapMax, size, alignment, page_size);
|
||||
uintptr_t const highest_start = align_down(UnscaledOopHeapMax - size, attach_point_alignment);
|
||||
uintptr_t const lowest_start = align_up(aligned_heap_base_min_address, attach_point_alignment);
|
||||
assert(lowest_start <= highest_start, "lowest: " INTPTR_FORMAT " highest: " INTPTR_FORMAT ,
|
||||
lowest_start, highest_start);
|
||||
reserved = try_reserve_range((char*)highest_start, (char*)lowest_start, attach_point_alignment,
|
||||
(char*)aligned_heap_base_min_address, (char*)UnscaledOopHeapMax, size, alignment, page_size);
|
||||
}
|
||||
|
||||
// zerobased: Attempt to allocate in the lower 32G.
|
||||
char *zerobased_max = (char *)OopEncodingHeapMax;
|
||||
const uintptr_t zerobased_max = OopEncodingHeapMax;
|
||||
|
||||
// Give it several tries from top of range to bottom.
|
||||
if (aligned_heap_base_min_address + size <= zerobased_max && // Zerobased theoretical possible.
|
||||
((!reserved.is_reserved()) || // No previous try succeeded.
|
||||
(reserved.end() > zerobased_max))) { // Unscaled delivered an arbitrary address.
|
||||
(reserved.end() > (char*)zerobased_max))) { // Unscaled delivered an arbitrary address.
|
||||
|
||||
// Release previous reservation
|
||||
release(reserved);
|
||||
|
||||
// Calc address range within we try to attach (range of possible start addresses).
|
||||
char *const highest_start = align_down(zerobased_max - size, attach_point_alignment);
|
||||
uintptr_t const highest_start = align_down(zerobased_max - size, attach_point_alignment);
|
||||
// Need to be careful about size being guaranteed to be less
|
||||
// than UnscaledOopHeapMax due to type constraints.
|
||||
char *lowest_start = aligned_heap_base_min_address;
|
||||
uint64_t unscaled_end = UnscaledOopHeapMax - size;
|
||||
if (unscaled_end < UnscaledOopHeapMax) { // unscaled_end wrapped if size is large
|
||||
lowest_start = MAX2(lowest_start, (char*)unscaled_end);
|
||||
uintptr_t lowest_start = aligned_heap_base_min_address;
|
||||
if (size < UnscaledOopHeapMax) {
|
||||
lowest_start = MAX2<uintptr_t>(lowest_start, UnscaledOopHeapMax - size);
|
||||
}
|
||||
lowest_start = align_up(lowest_start, attach_point_alignment);
|
||||
reserved = try_reserve_range(highest_start, lowest_start, attach_point_alignment,
|
||||
aligned_heap_base_min_address, zerobased_max, size, alignment, page_size);
|
||||
assert(lowest_start <= highest_start, "lowest: " INTPTR_FORMAT " highest: " INTPTR_FORMAT,
|
||||
lowest_start, highest_start);
|
||||
reserved = try_reserve_range((char*)highest_start, (char*)lowest_start, attach_point_alignment,
|
||||
(char*)aligned_heap_base_min_address, (char*)zerobased_max, size, alignment, page_size);
|
||||
}
|
||||
|
||||
// Now we go for heaps with base != 0. We need a noaccess prefix to efficiently
|
||||
@ -616,17 +619,17 @@ ReservedHeapSpace HeapReserver::Instance::reserve_compressed_oops_heap(const siz
|
||||
// Try to attach at addresses that are aligned to OopEncodingHeapMax. Disjointbase mode.
|
||||
char** addresses = get_attach_addresses_for_disjoint_mode();
|
||||
int i = 0;
|
||||
while ((addresses[i] != nullptr) && // End of array not yet reached.
|
||||
((!reserved.is_reserved()) || // No previous try succeeded.
|
||||
(reserved.end() > zerobased_max && // Not zerobased or unscaled address.
|
||||
// Not disjoint address.
|
||||
while ((addresses[i] != nullptr) && // End of array not yet reached.
|
||||
((!reserved.is_reserved()) || // No previous try succeeded.
|
||||
(reserved.end() > (char*)zerobased_max && // Not zerobased or unscaled address.
|
||||
// Not disjoint address.
|
||||
!CompressedOops::is_disjoint_heap_base_address((address)reserved.base())))) {
|
||||
|
||||
// Release previous reservation
|
||||
release(reserved);
|
||||
|
||||
char* const attach_point = addresses[i];
|
||||
assert(attach_point >= aligned_heap_base_min_address, "Flag support broken");
|
||||
assert((uintptr_t)attach_point >= aligned_heap_base_min_address, "Flag support broken");
|
||||
reserved = try_reserve_memory(size + noaccess_prefix, alignment, page_size, attach_point);
|
||||
i++;
|
||||
}
|
||||
|
||||
@ -326,7 +326,7 @@ bool RegionNode::is_unreachable_region(const PhaseGVN* phase) {
|
||||
|
||||
// First, cut the simple case of fallthrough region when NONE of
|
||||
// region's phis references itself directly or through a data node.
|
||||
if (is_possible_unsafe_loop(phase)) {
|
||||
if (is_possible_unsafe_loop()) {
|
||||
// If we have a possible unsafe loop, check if the region node is actually unreachable from root.
|
||||
if (is_unreachable_from_root(phase)) {
|
||||
_is_unreachable_region = true;
|
||||
@ -336,7 +336,7 @@ bool RegionNode::is_unreachable_region(const PhaseGVN* phase) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool RegionNode::is_possible_unsafe_loop(const PhaseGVN* phase) const {
|
||||
bool RegionNode::is_possible_unsafe_loop() const {
|
||||
uint max = outcnt();
|
||||
uint i;
|
||||
for (i = 0; i < max; i++) {
|
||||
@ -634,8 +634,8 @@ Node *RegionNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
}
|
||||
} else if (can_reshape && cnt == 1) {
|
||||
// Is it dead loop?
|
||||
// If it is LoopNopde it had 2 (+1 itself) inputs and
|
||||
// one of them was cut. The loop is dead if it was EntryContol.
|
||||
// If it is LoopNode it had 2 (+1 itself) inputs and
|
||||
// one of them was cut. The loop is dead if it was EntryControl.
|
||||
// Loop node may have only one input because entry path
|
||||
// is removed in PhaseIdealLoop::Dominators().
|
||||
assert(!this->is_Loop() || cnt_orig <= 3, "Loop node should have 3 or less inputs");
|
||||
@ -1392,7 +1392,7 @@ bool PhiNode::try_clean_memory_phi(PhaseIterGVN* igvn) {
|
||||
}
|
||||
assert(is_diamond_phi() > 0, "sanity");
|
||||
assert(req() == 3, "same as region");
|
||||
const Node* region = in(0);
|
||||
RegionNode* region = in(0)->as_Region();
|
||||
for (uint i = 1; i < 3; i++) {
|
||||
Node* phi_input = in(i);
|
||||
if (phi_input != nullptr && phi_input->is_MergeMem() && region->in(i)->outcnt() == 1) {
|
||||
@ -1400,8 +1400,9 @@ bool PhiNode::try_clean_memory_phi(PhaseIterGVN* igvn) {
|
||||
MergeMemNode* merge_mem = phi_input->as_MergeMem();
|
||||
uint j = 3 - i;
|
||||
Node* other_phi_input = in(j);
|
||||
if (other_phi_input != nullptr && other_phi_input == merge_mem->base_memory()) {
|
||||
if (other_phi_input != nullptr && other_phi_input == merge_mem->base_memory() && !is_data_loop(region, phi_input, igvn)) {
|
||||
// merge_mem is a successor memory to other_phi_input, and is not pinned inside the diamond, so push it out.
|
||||
// Only proceed if the transformation doesn't create a data loop
|
||||
// This will allow the diamond to collapse completely if there are no other phis left.
|
||||
igvn->replace_node(this, merge_mem);
|
||||
return true;
|
||||
|
||||
@ -84,7 +84,7 @@ private:
|
||||
bool _is_unreachable_region;
|
||||
LoopStatus _loop_status;
|
||||
|
||||
bool is_possible_unsafe_loop(const PhaseGVN* phase) const;
|
||||
bool is_possible_unsafe_loop() const;
|
||||
bool is_unreachable_from_root(const PhaseGVN* phase) const;
|
||||
public:
|
||||
// Node layout (parallels PhiNode):
|
||||
|
||||
@ -1471,6 +1471,65 @@ static OptoReg::Name find_first_set(LRG& lrg, RegMask& mask) {
|
||||
return assigned;
|
||||
}
|
||||
|
||||
OptoReg::Name PhaseChaitin::select_bias_lrg_color(LRG& lrg) {
|
||||
uint bias_lrg1_idx = _lrg_map.find(lrg._copy_bias);
|
||||
uint bias_lrg2_idx = _lrg_map.find(lrg._copy_bias2);
|
||||
|
||||
// If bias_lrg1 has a color
|
||||
if (bias_lrg1_idx != 0 && !_ifg->_yanked->test(bias_lrg1_idx)) {
|
||||
OptoReg::Name reg = lrgs(bias_lrg1_idx).reg();
|
||||
// and it is legal for lrg
|
||||
if (is_legal_reg(lrg, reg)) {
|
||||
return reg;
|
||||
}
|
||||
}
|
||||
|
||||
// If bias_lrg2 has a color
|
||||
if (bias_lrg2_idx != 0 && !_ifg->_yanked->test(bias_lrg2_idx)) {
|
||||
OptoReg::Name reg = lrgs(bias_lrg2_idx).reg();
|
||||
// and it is legal for lrg
|
||||
if (is_legal_reg(lrg, reg)) {
|
||||
return reg;
|
||||
}
|
||||
}
|
||||
|
||||
uint bias_lrg_idx = 0;
|
||||
if (bias_lrg1_idx != 0 && bias_lrg2_idx != 0) {
|
||||
// Since none of the bias live ranges are part of the IFG yet, constrain the
|
||||
// definition mask with the bias live range with the least degrees of
|
||||
// freedom. This will increase the chances of register sharing once the bias
|
||||
// live range becomes part of the IFG.
|
||||
lrgs(bias_lrg1_idx).compute_set_mask_size();
|
||||
lrgs(bias_lrg2_idx).compute_set_mask_size();
|
||||
bias_lrg_idx = lrgs(bias_lrg1_idx).degrees_of_freedom() >
|
||||
lrgs(bias_lrg2_idx).degrees_of_freedom()
|
||||
? bias_lrg2_idx
|
||||
: bias_lrg1_idx;
|
||||
} else if (bias_lrg1_idx != 0) {
|
||||
bias_lrg_idx = bias_lrg1_idx;
|
||||
} else if (bias_lrg2_idx != 0) {
|
||||
bias_lrg_idx = bias_lrg2_idx;
|
||||
}
|
||||
|
||||
// Register masks with offset excludes all mask bits before the offset.
|
||||
// Such masks are mainly used for allocation from stack slots. Constrain the
|
||||
// register mask of definition live range using bias mask only if
|
||||
// both masks have zero offset.
|
||||
if (bias_lrg_idx != 0 && !lrg.mask().is_offset() &&
|
||||
!lrgs(bias_lrg_idx).mask().is_offset()) {
|
||||
// Choose a color which is legal for bias_lrg
|
||||
ResourceMark rm(C->regmask_arena());
|
||||
RegMask tempmask(lrg.mask(), C->regmask_arena());
|
||||
tempmask.and_with(lrgs(bias_lrg_idx).mask());
|
||||
tempmask.clear_to_sets(lrg.num_regs());
|
||||
OptoReg::Name reg = find_first_set(lrg, tempmask);
|
||||
if (OptoReg::is_valid(reg)) {
|
||||
return reg;
|
||||
}
|
||||
}
|
||||
return OptoReg::Bad;
|
||||
}
|
||||
|
||||
// Choose a color using the biasing heuristic
|
||||
OptoReg::Name PhaseChaitin::bias_color(LRG& lrg) {
|
||||
|
||||
@ -1492,25 +1551,10 @@ OptoReg::Name PhaseChaitin::bias_color(LRG& lrg) {
|
||||
}
|
||||
}
|
||||
|
||||
uint copy_lrg = _lrg_map.find(lrg._copy_bias);
|
||||
if (copy_lrg != 0) {
|
||||
// If he has a color,
|
||||
if(!_ifg->_yanked->test(copy_lrg)) {
|
||||
OptoReg::Name reg = lrgs(copy_lrg).reg();
|
||||
// And it is legal for you,
|
||||
if (is_legal_reg(lrg, reg)) {
|
||||
return reg;
|
||||
}
|
||||
} else if (!lrg.mask().is_offset()) {
|
||||
// Choose a color which is legal for him
|
||||
ResourceMark rm(C->regmask_arena());
|
||||
RegMask tempmask(lrg.mask(), C->regmask_arena());
|
||||
tempmask.and_with(lrgs(copy_lrg).mask());
|
||||
tempmask.clear_to_sets(lrg.num_regs());
|
||||
OptoReg::Name reg = find_first_set(lrg, tempmask);
|
||||
if (OptoReg::is_valid(reg))
|
||||
return reg;
|
||||
}
|
||||
// Try biasing the color with non-interfering bias live range[s].
|
||||
OptoReg::Name reg = select_bias_lrg_color(lrg);
|
||||
if (OptoReg::is_valid(reg)) {
|
||||
return reg;
|
||||
}
|
||||
|
||||
// If no bias info exists, just go with the register selection ordering
|
||||
@ -1524,7 +1568,7 @@ OptoReg::Name PhaseChaitin::bias_color(LRG& lrg) {
|
||||
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
|
||||
// copy removal to remove many more copies, by preventing a just-assigned
|
||||
// register from being repeatedly assigned.
|
||||
OptoReg::Name reg = lrg.mask().find_first_elem();
|
||||
reg = lrg.mask().find_first_elem();
|
||||
if( (++_alternate & 1) && OptoReg::is_valid(reg) ) {
|
||||
// This 'Remove; find; Insert' idiom is an expensive way to find the
|
||||
// SECOND element in the mask.
|
||||
@ -1640,6 +1684,27 @@ uint PhaseChaitin::Select( ) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Node* def = lrg->_def;
|
||||
if (lrg->is_singledef() && !lrg->_is_bound && def->is_Mach()) {
|
||||
MachNode* mdef = def->as_Mach();
|
||||
if (Matcher::is_register_biasing_candidate(mdef, 1)) {
|
||||
Node* in1 = mdef->in(mdef->operand_index(1));
|
||||
if (in1 != nullptr && lrg->_copy_bias == 0) {
|
||||
lrg->_copy_bias = _lrg_map.find(in1);
|
||||
}
|
||||
}
|
||||
|
||||
// For commutative operations, def allocation can also be
|
||||
// biased towards LRG of second input's def.
|
||||
if (Matcher::is_register_biasing_candidate(mdef, 2)) {
|
||||
Node* in2 = mdef->in(mdef->operand_index(2));
|
||||
if (in2 != nullptr && lrg->_copy_bias2 == 0) {
|
||||
lrg->_copy_bias2 = _lrg_map.find(in2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//assert(is_infinite_stack == lrg->mask().is_infinite_stack(), "nbrs must not change InfiniteStackedness");
|
||||
// Aligned pairs need aligned masks
|
||||
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
|
||||
|
||||
@ -63,6 +63,7 @@ public:
|
||||
|
||||
uint _risk_bias; // Index of LRG which we want to avoid color
|
||||
uint _copy_bias; // Index of LRG which we want to share color
|
||||
uint _copy_bias2; // Index of second LRG which we want to share color
|
||||
|
||||
uint _next; // Index of next LRG in linked list
|
||||
uint _prev; // Index of prev LRG in linked list
|
||||
@ -703,6 +704,8 @@ private:
|
||||
OptoReg::Name choose_color(LRG& lrg);
|
||||
// Helper function which implements biasing heuristic
|
||||
OptoReg::Name bias_color(LRG& lrg);
|
||||
// Helper function which implements color biasing
|
||||
OptoReg::Name select_bias_lrg_color(LRG& lrg);
|
||||
|
||||
// Split uncolorable live ranges
|
||||
// Return new number of live ranges
|
||||
|
||||
@ -35,6 +35,97 @@
|
||||
|
||||
#ifndef PRODUCT
|
||||
|
||||
// Support for printing properties
|
||||
class PrintProperties
|
||||
{
|
||||
private:
|
||||
IdealGraphPrinter* _printer;
|
||||
|
||||
public:
|
||||
PrintProperties(IdealGraphPrinter* printer) : _printer(printer) {}
|
||||
void print_node_properties(Node* node);
|
||||
void print_lrg_properties(const LRG& lrg, const char* buffer);
|
||||
void print_property(int flag, const char* name);
|
||||
void print_property(int flag, const char* name, const char* val);
|
||||
void print_property(int flag, const char* name, int val);
|
||||
};
|
||||
|
||||
void PrintProperties::print_node_properties(Node* node) {
|
||||
const jushort flags = node->flags();
|
||||
print_property((flags & Node::Flag_is_Copy), "is_copy");
|
||||
print_property((flags & Node::Flag_rematerialize), "rematerialize");
|
||||
print_property((flags & Node::Flag_needs_anti_dependence_check), "needs_anti_dependence_check");
|
||||
print_property((flags & Node::Flag_is_macro), "is_macro");
|
||||
print_property((flags & Node::Flag_is_Con), "is_con");
|
||||
print_property((flags & Node::Flag_is_cisc_alternate), "is_cisc_alternate");
|
||||
print_property((flags & Node::Flag_is_dead_loop_safe), "is_dead_loop_safe");
|
||||
print_property((flags & Node::Flag_may_be_short_branch), "may_be_short_branch");
|
||||
print_property((flags & Node::Flag_has_call), "has_call");
|
||||
print_property((flags & Node::Flag_has_swapped_edges), "has_swapped_edges");
|
||||
Matcher* matcher = _printer->C->matcher();
|
||||
if (matcher != nullptr) {
|
||||
print_property(matcher->is_shared(node),"is_shared");
|
||||
print_property(!(matcher->is_shared(node)), "is_shared", IdealGraphPrinter::FALSE_VALUE);
|
||||
print_property(matcher->is_dontcare(node), "is_dontcare");
|
||||
print_property(!(matcher->is_dontcare(node)),"is_dontcare", IdealGraphPrinter::FALSE_VALUE);
|
||||
Node* old = matcher->find_old_node(node);
|
||||
if (old != nullptr) {
|
||||
print_property(true, "old_node_idx", old->_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PrintProperties::print_lrg_properties(const LRG &lrg, const char *buffer) {
|
||||
print_property(true, "mask", buffer);
|
||||
print_property(true, "mask_size", lrg.mask_size());
|
||||
if (lrg._degree_valid) {
|
||||
print_property(true, "degree", lrg.degree());
|
||||
}
|
||||
print_property(true, "num_regs", lrg.num_regs());
|
||||
print_property(true, "reg_pressure", lrg.reg_pressure());
|
||||
print_property(true, "cost", lrg._cost);
|
||||
print_property(true, "area", lrg._area);
|
||||
print_property(true, "score", lrg.score());
|
||||
print_property((lrg._risk_bias != 0), "risk_bias", lrg._risk_bias);
|
||||
print_property((lrg._copy_bias != 0), "copy_bias", lrg._copy_bias);
|
||||
print_property((lrg._copy_bias2 != 0), "copy_bias2", lrg._copy_bias2);
|
||||
print_property(lrg.is_singledef(), "is_singledef");
|
||||
print_property(lrg.is_multidef(), "is_multidef");
|
||||
print_property(lrg._is_oop, "is_oop");
|
||||
print_property(lrg._is_float, "is_float");
|
||||
print_property(lrg._is_vector, "is_vector");
|
||||
print_property(lrg._is_predicate, "is_predicate");
|
||||
print_property(lrg._is_scalable, "is_scalable");
|
||||
print_property(lrg._was_spilled1, "was_spilled1");
|
||||
print_property(lrg._was_spilled2, "was_spilled2");
|
||||
print_property(lrg._direct_conflict, "direct_conflict");
|
||||
print_property(lrg._fat_proj, "fat_proj");
|
||||
print_property(lrg._was_lo, "_was_lo");
|
||||
print_property(lrg._has_copy, "has_copy");
|
||||
print_property(lrg._at_risk, "at_risk");
|
||||
print_property(lrg._must_spill, "must_spill");
|
||||
print_property(lrg._is_bound, "is_bound");
|
||||
print_property((lrg._msize_valid && lrg._degree_valid && lrg.lo_degree()), "trivial");
|
||||
}
|
||||
|
||||
void PrintProperties::print_property(int flag, const char* name) {
|
||||
if (flag != 0) {
|
||||
_printer->print_prop(name, IdealGraphPrinter::TRUE_VALUE);
|
||||
}
|
||||
}
|
||||
|
||||
void PrintProperties::print_property(int flag, const char* name, const char* val) {
|
||||
if (flag != 0) {
|
||||
_printer->print_prop(name, val);
|
||||
}
|
||||
}
|
||||
|
||||
void PrintProperties::print_property(int flag, const char* name, int val) {
|
||||
if (flag != 0) {
|
||||
_printer->print_prop(name, val);
|
||||
}
|
||||
}
|
||||
|
||||
// Constants
|
||||
// Keep consistent with Java constants
|
||||
const char *IdealGraphPrinter::INDENT = " ";
|
||||
@ -522,54 +613,8 @@ void IdealGraphPrinter::visit_node(Node* n, bool edges) {
|
||||
print_prop("jvms", buffer);
|
||||
}
|
||||
|
||||
const jushort flags = node->flags();
|
||||
if (flags & Node::Flag_is_Copy) {
|
||||
print_prop("is_copy", "true");
|
||||
}
|
||||
if (flags & Node::Flag_rematerialize) {
|
||||
print_prop("rematerialize", "true");
|
||||
}
|
||||
if (flags & Node::Flag_needs_anti_dependence_check) {
|
||||
print_prop("needs_anti_dependence_check", "true");
|
||||
}
|
||||
if (flags & Node::Flag_is_macro) {
|
||||
print_prop("is_macro", "true");
|
||||
}
|
||||
if (flags & Node::Flag_is_Con) {
|
||||
print_prop("is_con", "true");
|
||||
}
|
||||
if (flags & Node::Flag_is_cisc_alternate) {
|
||||
print_prop("is_cisc_alternate", "true");
|
||||
}
|
||||
if (flags & Node::Flag_is_dead_loop_safe) {
|
||||
print_prop("is_dead_loop_safe", "true");
|
||||
}
|
||||
if (flags & Node::Flag_may_be_short_branch) {
|
||||
print_prop("may_be_short_branch", "true");
|
||||
}
|
||||
if (flags & Node::Flag_has_call) {
|
||||
print_prop("has_call", "true");
|
||||
}
|
||||
if (flags & Node::Flag_has_swapped_edges) {
|
||||
print_prop("has_swapped_edges", "true");
|
||||
}
|
||||
|
||||
if (C->matcher() != nullptr) {
|
||||
if (C->matcher()->is_shared(node)) {
|
||||
print_prop("is_shared", "true");
|
||||
} else {
|
||||
print_prop("is_shared", "false");
|
||||
}
|
||||
if (C->matcher()->is_dontcare(node)) {
|
||||
print_prop("is_dontcare", "true");
|
||||
} else {
|
||||
print_prop("is_dontcare", "false");
|
||||
}
|
||||
Node* old = C->matcher()->find_old_node(node);
|
||||
if (old != nullptr) {
|
||||
print_prop("old_node_idx", old->_idx);
|
||||
}
|
||||
}
|
||||
PrintProperties print_node(this);
|
||||
print_node.print_node_properties(node);
|
||||
|
||||
if (node->is_Proj()) {
|
||||
print_prop("con", (int)node->as_Proj()->_con);
|
||||
@ -1145,73 +1190,10 @@ void IdealGraphPrinter::print(const char* name, Node* node, GrowableArray<const
|
||||
buffer[0] = 0;
|
||||
stringStream lrg_mask_stream(buffer, sizeof(buffer) - 1);
|
||||
lrg.mask().dump(&lrg_mask_stream);
|
||||
print_prop("mask", buffer);
|
||||
print_prop("mask_size", lrg.mask_size());
|
||||
if (lrg._degree_valid) {
|
||||
print_prop("degree", lrg.degree());
|
||||
}
|
||||
print_prop("num_regs", lrg.num_regs());
|
||||
print_prop("reg_pressure", lrg.reg_pressure());
|
||||
print_prop("cost", lrg._cost);
|
||||
print_prop("area", lrg._area);
|
||||
print_prop("score", lrg.score());
|
||||
if (lrg._risk_bias != 0) {
|
||||
print_prop("risk_bias", lrg._risk_bias);
|
||||
}
|
||||
if (lrg._copy_bias != 0) {
|
||||
print_prop("copy_bias", lrg._copy_bias);
|
||||
}
|
||||
if (lrg.is_singledef()) {
|
||||
print_prop("is_singledef", TRUE_VALUE);
|
||||
}
|
||||
if (lrg.is_multidef()) {
|
||||
print_prop("is_multidef", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._is_oop) {
|
||||
print_prop("is_oop", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._is_float) {
|
||||
print_prop("is_float", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._is_vector) {
|
||||
print_prop("is_vector", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._is_predicate) {
|
||||
print_prop("is_predicate", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._is_scalable) {
|
||||
print_prop("is_scalable", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._was_spilled1) {
|
||||
print_prop("was_spilled1", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._was_spilled2) {
|
||||
print_prop("was_spilled2", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._direct_conflict) {
|
||||
print_prop("direct_conflict", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._fat_proj) {
|
||||
print_prop("fat_proj", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._was_lo) {
|
||||
print_prop("_was_lo", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._has_copy) {
|
||||
print_prop("has_copy", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._at_risk) {
|
||||
print_prop("at_risk", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._must_spill) {
|
||||
print_prop("must_spill", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._is_bound) {
|
||||
print_prop("is_bound", TRUE_VALUE);
|
||||
}
|
||||
if (lrg._msize_valid && lrg._degree_valid && lrg.lo_degree()) {
|
||||
print_prop("trivial", TRUE_VALUE);
|
||||
}
|
||||
|
||||
PrintProperties print_node(this);
|
||||
print_node.print_lrg_properties(lrg, buffer);
|
||||
|
||||
tail(PROPERTIES_ELEMENT);
|
||||
tail(LIVE_RANGE_ELEMENT);
|
||||
}
|
||||
|
||||
@ -46,8 +46,9 @@ class ConnectionGraph;
|
||||
class Parse;
|
||||
|
||||
class IdealGraphPrinter : public CHeapObj<mtCompiler> {
|
||||
private:
|
||||
friend class PrintProperties;
|
||||
|
||||
private:
|
||||
static const char *INDENT;
|
||||
static const char *TOP_ELEMENT;
|
||||
static const char *GROUP_ELEMENT;
|
||||
|
||||
@ -460,6 +460,13 @@ int MachNode::operand_index(Node* def) const {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int MachNode::operand_num_edges(uint oper_index) const {
|
||||
if (num_opnds() > oper_index) {
|
||||
return _opnds[oper_index]->num_edges();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//------------------------------peephole---------------------------------------
|
||||
// Apply peephole rule(s) to this instruction
|
||||
int MachNode::peephole(Block *block, int block_index, PhaseCFG* cfg_, PhaseRegAlloc *ra_) {
|
||||
|
||||
@ -266,6 +266,7 @@ public:
|
||||
int operand_index(uint operand) const;
|
||||
int operand_index(const MachOper *oper) const;
|
||||
int operand_index(Node* m) const;
|
||||
int operand_num_edges(uint operand) const;
|
||||
|
||||
// Register class input is expected in
|
||||
virtual const RegMask &in_RegMask(uint) const;
|
||||
|
||||
@ -512,6 +512,8 @@ public:
|
||||
DEBUG_ONLY( bool verify_after_postselect_cleanup(); )
|
||||
|
||||
public:
|
||||
static bool is_register_biasing_candidate(const MachNode* mdef, int oper_index);
|
||||
|
||||
// This routine is run whenever a graph fails to match.
|
||||
// If it returns, the compiler should bailout to interpreter without error.
|
||||
// In non-product mode, SoftMatchFailure is false to detect non-canonical
|
||||
|
||||
@ -828,26 +828,26 @@ public:
|
||||
#undef DEFINE_CLASS_ID
|
||||
|
||||
// Flags are sorted by usage frequency.
|
||||
enum NodeFlags {
|
||||
Flag_is_Copy = 1 << 0, // should be first bit to avoid shift
|
||||
Flag_rematerialize = 1 << 1,
|
||||
Flag_needs_anti_dependence_check = 1 << 2,
|
||||
Flag_is_macro = 1 << 3,
|
||||
Flag_is_Con = 1 << 4,
|
||||
Flag_is_cisc_alternate = 1 << 5,
|
||||
Flag_is_dead_loop_safe = 1 << 6,
|
||||
Flag_may_be_short_branch = 1 << 7,
|
||||
Flag_avoid_back_to_back_before = 1 << 8,
|
||||
Flag_avoid_back_to_back_after = 1 << 9,
|
||||
Flag_has_call = 1 << 10,
|
||||
Flag_has_swapped_edges = 1 << 11,
|
||||
Flag_is_scheduled = 1 << 12,
|
||||
Flag_is_expensive = 1 << 13,
|
||||
Flag_is_predicated_vector = 1 << 14,
|
||||
Flag_for_post_loop_opts_igvn = 1 << 15,
|
||||
Flag_for_merge_stores_igvn = 1 << 16,
|
||||
Flag_is_removed_by_peephole = 1 << 17,
|
||||
Flag_is_predicated_using_blend = 1 << 18,
|
||||
enum NodeFlags : uint64_t {
|
||||
Flag_is_Copy = 1ULL << 0, // should be first bit to avoid shift
|
||||
Flag_rematerialize = 1ULL << 1,
|
||||
Flag_needs_anti_dependence_check = 1ULL << 2,
|
||||
Flag_is_macro = 1ULL << 3,
|
||||
Flag_is_Con = 1ULL << 4,
|
||||
Flag_is_cisc_alternate = 1ULL << 5,
|
||||
Flag_is_dead_loop_safe = 1ULL << 6,
|
||||
Flag_may_be_short_branch = 1ULL << 7,
|
||||
Flag_avoid_back_to_back_before = 1ULL << 8,
|
||||
Flag_avoid_back_to_back_after = 1ULL << 9,
|
||||
Flag_has_call = 1ULL << 10,
|
||||
Flag_has_swapped_edges = 1ULL << 11,
|
||||
Flag_is_scheduled = 1ULL << 12,
|
||||
Flag_is_expensive = 1ULL << 13,
|
||||
Flag_is_predicated_vector = 1ULL << 14,
|
||||
Flag_for_post_loop_opts_igvn = 1ULL << 15,
|
||||
Flag_for_merge_stores_igvn = 1ULL << 16,
|
||||
Flag_is_removed_by_peephole = 1ULL << 17,
|
||||
Flag_is_predicated_using_blend = 1ULL << 18,
|
||||
_last_flag = Flag_is_predicated_using_blend
|
||||
};
|
||||
|
||||
|
||||
@ -1022,27 +1022,39 @@ bool VPointer::can_make_speculative_aliasing_check_with(const VPointer& other) c
|
||||
// or at the multiversion_if. That is before the pre-loop. From the construction of
|
||||
// VPointer, we already know that all its variables (except iv) are pre-loop invariant.
|
||||
//
|
||||
// For the computation of main_init, we also need the pre_limit, and so we need
|
||||
// to check that this value is pre-loop invariant. In the case of non-equal iv_scales,
|
||||
// we also need the main_limit in the aliasing check, and so this value must then
|
||||
// also be pre-loop invariant.
|
||||
// In VPointer::make_speculative_aliasing_check_with we compute main_init in all
|
||||
// cases. For this, we require pre_init and pre_limit. These values must be available
|
||||
// for the speculative check, i.e. their control must dominate the speculative check.
|
||||
// Further, "if vp1.iv_scale() != vp2.iv_scale()" we additionally need to have
|
||||
// main_limit available for the speculative check.
|
||||
// Note: no matter if the speculative check is inserted as a predicate or at the
|
||||
// multiversion if, the speculative check happens before (dominates) the
|
||||
// pre-loop.
|
||||
Node* pre_init = _vloop.pre_loop_end()->init_trip();
|
||||
Opaque1Node* pre_limit_opaq = _vloop.pre_loop_end()->limit()->as_Opaque1();
|
||||
Node* pre_limit = pre_limit_opaq->in(1);
|
||||
Node* main_limit = _vloop.cl()->limit();
|
||||
|
||||
if (!_vloop.is_pre_loop_invariant(pre_limit)) {
|
||||
if (!_vloop.is_available_for_speculative_check(pre_init)) {
|
||||
#ifdef ASSERT
|
||||
if (_vloop.is_trace_speculative_aliasing_analysis()) {
|
||||
tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: pre_limit is not pre-loop independent!");
|
||||
tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: pre_limit is not available at speculative check!");
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
if (!_vloop.is_available_for_speculative_check(pre_limit)) {
|
||||
#ifdef ASSERT
|
||||
if (_vloop.is_trace_speculative_aliasing_analysis()) {
|
||||
tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: pre_limit is not available at speculative check!");
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vp1.iv_scale() != vp2.iv_scale() && !_vloop.is_pre_loop_invariant(main_limit)) {
|
||||
if (vp1.iv_scale() != vp2.iv_scale() && !_vloop.is_available_for_speculative_check(main_limit)) {
|
||||
#ifdef ASSERT
|
||||
if (_vloop.is_trace_speculative_aliasing_analysis()) {
|
||||
tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: main_limit is not pre-loop independent!");
|
||||
tty->print_cr("VPointer::can_make_speculative_aliasing_check_with: main_limit is not available at speculative check!");
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
@ -1119,6 +1131,8 @@ BoolNode* VPointer::make_speculative_aliasing_check_with(const VPointer& other,
|
||||
Node* pre_limit = pre_limit_opaq->in(1);
|
||||
assert(_vloop.is_pre_loop_invariant(pre_init), "needed for aliasing check before pre-loop");
|
||||
assert(_vloop.is_pre_loop_invariant(pre_limit), "needed for aliasing check before pre-loop");
|
||||
assert(_vloop.is_available_for_speculative_check(pre_init), "ctrl must be early enough to avoid cycles");
|
||||
assert(_vloop.is_available_for_speculative_check(pre_limit), "ctrl must be early enough to avoid cycles");
|
||||
|
||||
Node* pre_initL = new ConvI2LNode(pre_init);
|
||||
Node* pre_limitL = new ConvI2LNode(pre_limit);
|
||||
@ -1180,6 +1194,7 @@ BoolNode* VPointer::make_speculative_aliasing_check_with(const VPointer& other,
|
||||
jint main_iv_stride = _vloop.iv_stride();
|
||||
Node* main_limit = _vloop.cl()->limit();
|
||||
assert(_vloop.is_pre_loop_invariant(main_limit), "needed for aliasing check before pre-loop");
|
||||
assert(_vloop.is_available_for_speculative_check(main_limit), "ctrl must be early enough to avoid cycles");
|
||||
|
||||
Node* main_limitL = new ConvI2LNode(main_limit);
|
||||
phase->register_new_node_with_ctrl_of(main_limitL, pre_init);
|
||||
|
||||
@ -236,6 +236,8 @@ public:
|
||||
// Some nodes must be pre-loop invariant, so that they can be used for conditions
|
||||
// before or inside the pre-loop. For example, alignment of main-loop vector
|
||||
// memops must be achieved in the pre-loop, via the exit check in the pre-loop.
|
||||
// Note: this condition is NOT strong enough for speculative checks, those happen
|
||||
// before the pre-loop. See is_available_for_speculative_check
|
||||
bool is_pre_loop_invariant(Node* n) const {
|
||||
// Must be in the main-loop, otherwise we can't access the pre-loop.
|
||||
// This fails during SuperWord::unrolling_analysis, but that is ok.
|
||||
@ -257,6 +259,28 @@ public:
|
||||
return is_before_pre_loop(early);
|
||||
}
|
||||
|
||||
// Nodes that are to be used in speculative checks must be available early enough.
|
||||
// Note: the speculative check happens before the pre-loop, either at the auto
|
||||
// vectorization predicate or the multiversion if. This is before the
|
||||
// pre-loop, and thus the condition here is stronger then the one from
|
||||
// is_pre_loop_invariant.
|
||||
bool is_available_for_speculative_check(Node* n) const {
|
||||
assert(are_speculative_checks_possible(), "meaningless without speculative check");
|
||||
ParsePredicateSuccessProj* parse_predicate_proj = auto_vectorization_parse_predicate_proj();
|
||||
// Find the control of the predicate:
|
||||
ProjNode* proj = (parse_predicate_proj != nullptr) ? parse_predicate_proj : multiversioning_fast_proj();
|
||||
Node* check_ctrl = proj->in(0)->as_If()->in(0);
|
||||
|
||||
// Often, the control of n already dominates that of the predicate.
|
||||
Node* n_ctrl = phase()->get_ctrl(n);
|
||||
if (phase()->is_dominator(n_ctrl, check_ctrl)) { return true; }
|
||||
|
||||
// But in some cases, the ctrl of n is after that of the predicate,
|
||||
// but the early ctrl is before the predicate.
|
||||
Node* n_early = phase()->compute_early_ctrl(n, n_ctrl);
|
||||
return phase()->is_dominator(n_early, check_ctrl);
|
||||
}
|
||||
|
||||
// Check if the loop passes some basic preconditions for vectorization.
|
||||
// Return indicates if analysis succeeded.
|
||||
bool check_preconditions();
|
||||
|
||||
@ -40,38 +40,76 @@ void VTransformGraph::add_vtnode(VTransformNode* vtnode) {
|
||||
} \
|
||||
)
|
||||
|
||||
// This is similar to IGVN optimization. But we are a bit lazy, and don't care about
|
||||
// notification / worklist, since the list of nodes is rather small, and we don't
|
||||
// expect optimizations that trickle over the whole graph.
|
||||
void VTransformGraph::optimize(VTransform& vtransform) {
|
||||
TRACE_OPTIMIZE( tty->print_cr("\nVTransformGraph::optimize"); )
|
||||
|
||||
bool progress = true;
|
||||
DEBUG_ONLY(int pass_count = 0;)
|
||||
while (progress) {
|
||||
progress = false;
|
||||
assert(++pass_count < 10, "ensure we do not have endless loops");
|
||||
for (int i = 0; i < _vtnodes.length(); i++) {
|
||||
VTransformNode* vtn = _vtnodes.at(i);
|
||||
if (!vtn->is_alive()) { continue; }
|
||||
progress |= vtn->optimize(_vloop_analyzer, vtransform);
|
||||
|
||||
// Nodes that have no use any more are dead.
|
||||
if (vtn->out_strong_edges() == 0 &&
|
||||
// There are some exceptions:
|
||||
// 1. Memory phi uses are not modeled, so they appear to have no use here, but must be kept alive.
|
||||
// 2. Similarly, some stores may not have their memory uses modeled, but need to be kept alive.
|
||||
// 3. Outer node with strong inputs: is a use after the loop that we must keep alive.
|
||||
!(vtn->isa_PhiScalar() != nullptr ||
|
||||
vtn->is_load_or_store_in_loop() ||
|
||||
(vtn->isa_Outer() != nullptr && vtn->has_strong_in_edge()))) {
|
||||
vtn->mark_dead();
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
void VTransformOptimize::worklist_push(VTransformNode* vtn) {
|
||||
if (!_worklist_set.test_set(vtn->_idx)) {
|
||||
_worklist.push(vtn);
|
||||
}
|
||||
}
|
||||
|
||||
VTransformNode* VTransformOptimize::worklist_pop() {
|
||||
VTransformNode* vtn = _worklist.pop();
|
||||
_worklist_set.remove(vtn->_idx);
|
||||
return vtn;
|
||||
}
|
||||
|
||||
void VTransform::optimize() {
|
||||
NOT_PRODUCT( if (vloop().is_trace_optimization()) { tty->print_cr("\nVTransform::optimize"); } )
|
||||
ResourceMark rm;
|
||||
VTransformOptimize vtoptimize(_vloop_analyzer, *this);
|
||||
vtoptimize.optimize();
|
||||
}
|
||||
|
||||
void VTransformOptimize::optimize() {
|
||||
// Initialize: push all nodes to worklist.
|
||||
for (int i = 0; i < _vtransform.graph().vtnodes().length(); i++) {
|
||||
VTransformNode* vtn = _vtransform.graph().vtnodes().at(i);
|
||||
worklist_push(vtn);
|
||||
}
|
||||
|
||||
// We don't want to iterate too many times. We set some arbitrary limit,
|
||||
// just to catch infinite loops.
|
||||
DEBUG_ONLY( int allowed_steps = 100 * _worklist.length(); )
|
||||
|
||||
// Optimize iteratively.
|
||||
while (_worklist.is_nonempty()) {
|
||||
VTransformNode* vtn = worklist_pop();
|
||||
optimize_step(vtn);
|
||||
assert(--allowed_steps > 0, "no endless loop");
|
||||
}
|
||||
|
||||
DEBUG_ONLY( verify(); )
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void VTransformOptimize::verify() {
|
||||
for (int i = 0; i < _vtransform.graph().vtnodes().length(); i++) {
|
||||
VTransformNode* vtn = _vtransform.graph().vtnodes().at(i);
|
||||
assert(!optimize_step(vtn), "Missed optimization during VTransform::optimize for %s", vtn->name());
|
||||
assert(_worklist.is_empty(), "vtnode on worklist despite no progress for %s", vtn->name());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Return true if (and only if) we made progress.
|
||||
bool VTransformOptimize::optimize_step(VTransformNode* vtn) {
|
||||
if (!vtn->is_alive()) { return false; }
|
||||
bool progress = vtn->optimize(*this);
|
||||
|
||||
// Nodes that have no use any more are dead.
|
||||
if (vtn->out_strong_edges() == 0 &&
|
||||
// There are some exceptions:
|
||||
// 1. Memory phi uses are not modeled, so they appear to have no use here, but must be kept alive.
|
||||
// 2. Similarly, some stores may not have their memory uses modeled, but need to be kept alive.
|
||||
// 3. Outer node with strong inputs: is a use after the loop that we must keep alive.
|
||||
!(vtn->isa_PhiScalar() != nullptr ||
|
||||
vtn->is_load_or_store_in_loop() ||
|
||||
(vtn->isa_Outer() != nullptr && vtn->has_strong_in_edge()))) {
|
||||
vtn->mark_dead(*this);
|
||||
return true;
|
||||
}
|
||||
return progress;
|
||||
}
|
||||
|
||||
// Compute a linearization of the graph. We do this with a reverse-post-order of a DFS.
|
||||
// This only works if the graph is a directed acyclic graph (DAG). The C2 graph, and
|
||||
// the VLoopDependencyGraph are both DAGs, but after introduction of vectors/packs, the
|
||||
@ -1141,8 +1179,8 @@ VTransformApplyResult VTransformBoolVectorNode::apply(VTransformApplyState& appl
|
||||
return VTransformApplyResult::make_vector(vn);
|
||||
}
|
||||
|
||||
bool VTransformReductionVectorNode::optimize(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform) {
|
||||
return optimize_move_non_strict_order_reductions_out_of_loop(vloop_analyzer, vtransform);
|
||||
bool VTransformReductionVectorNode::optimize(VTransformOptimize& vtoptimize) {
|
||||
return optimize_move_non_strict_order_reductions_out_of_loop(vtoptimize);
|
||||
}
|
||||
|
||||
int VTransformReductionVectorNode::vector_reduction_opcode() const {
|
||||
@ -1213,7 +1251,7 @@ bool VTransformReductionVectorNode::requires_strict_order() const {
|
||||
// become profitable, since the expensive reduction node is moved
|
||||
// outside the loop, and instead cheaper element-wise vector accumulations
|
||||
// are performed inside the loop.
|
||||
bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_out_of_loop_preconditions(VTransform& vtransform) {
|
||||
bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_out_of_loop_preconditions(const VTransform& vtransform) {
|
||||
// We have a phi with a single use.
|
||||
VTransformPhiScalarNode* phi = in_req(1)->isa_PhiScalar();
|
||||
if (phi == nullptr) {
|
||||
@ -1260,13 +1298,13 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
|
||||
current_red->element_basic_type() != bt ||
|
||||
current_red->vector_length() != vlen) {
|
||||
TRACE_OPTIMIZE(
|
||||
tty->print(" Cannot move out of loop, other reduction node does not match:");
|
||||
tty->print(" Cannot move out of loop, other reduction node does not match: ");
|
||||
print();
|
||||
tty->print(" other: ");
|
||||
if (current_red != nullptr) {
|
||||
current_red->print();
|
||||
} else {
|
||||
tty->print("nullptr");
|
||||
tty->print_cr("nullptr");
|
||||
}
|
||||
)
|
||||
return false; // not compatible
|
||||
@ -1314,7 +1352,8 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
|
||||
return true; // success
|
||||
}
|
||||
|
||||
bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_out_of_loop(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform) {
|
||||
bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_out_of_loop(VTransformOptimize& vtoptimize) {
|
||||
VTransform& vtransform = vtoptimize.vtransform();
|
||||
if (!optimize_move_non_strict_order_reductions_out_of_loop_preconditions(vtransform)) {
|
||||
return false;
|
||||
}
|
||||
@ -1328,7 +1367,7 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
|
||||
const uint vlen = vector_length();
|
||||
const BasicType bt = element_basic_type();
|
||||
const int vopc = VectorNode::opcode(sopc, bt);
|
||||
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
|
||||
PhaseIdealLoop* phase = vtoptimize.vloop_analyzer().vloop().phase();
|
||||
|
||||
// Create a vector of identity values.
|
||||
Node* identity = ReductionNode::make_identity_con_scalar(phase->igvn(), sopc, bt);
|
||||
@ -1341,6 +1380,7 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
|
||||
// Look at old scalar phi.
|
||||
VTransformPhiScalarNode* phi_scalar = in_req(1)->isa_PhiScalar();
|
||||
PhiNode* old_phi = phi_scalar->node();
|
||||
vtoptimize.worklist_push(phi_scalar);
|
||||
VTransformNode* init = phi_scalar->in_req(1);
|
||||
|
||||
TRACE_OPTIMIZE(
|
||||
@ -1354,6 +1394,7 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
|
||||
phi_vector->init_req(0, phi_scalar->in_req(0));
|
||||
phi_vector->init_req(1, vtn_identity_vector);
|
||||
// Note: backedge comes later
|
||||
vtoptimize.worklist_push(phi_vector);
|
||||
|
||||
// Traverse down the chain of reductions, and replace them with vector_accumulators.
|
||||
VTransformReductionVectorNode* first_red = this;
|
||||
@ -1365,6 +1406,8 @@ bool VTransformReductionVectorNode::optimize_move_non_strict_order_reductions_ou
|
||||
VTransformVectorNode* vector_accumulator = new (vtransform.arena()) VTransformElementWiseVectorNode(vtransform, 3, current_red->properties(), vopc);
|
||||
vector_accumulator->init_req(1, current_vector_accumulator);
|
||||
vector_accumulator->init_req(2, vector_input);
|
||||
vtoptimize.worklist_push(current_red);
|
||||
vtoptimize.worklist_push(vector_accumulator);
|
||||
TRACE_OPTIMIZE(
|
||||
tty->print(" replace ");
|
||||
current_red->print();
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#ifndef SHARE_OPTO_VTRANSFORM_HPP
|
||||
#define SHARE_OPTO_VTRANSFORM_HPP
|
||||
|
||||
#include "libadt/vectset.hpp"
|
||||
#include "opto/node.hpp"
|
||||
#include "opto/vectorization.hpp"
|
||||
#include "opto/vectornode.hpp"
|
||||
@ -192,7 +193,6 @@ public:
|
||||
const GrowableArray<VTransformNode*>& vtnodes() const { return _vtnodes; }
|
||||
const GrowableArray<VTransformNode*>& get_schedule() const { return _schedule; }
|
||||
|
||||
void optimize(VTransform& vtransform);
|
||||
bool schedule();
|
||||
bool has_store_to_load_forwarding_failure(const VLoopAnalyzer& vloop_analyzer) const;
|
||||
float cost_for_vector_loop() const;
|
||||
@ -257,7 +257,7 @@ public:
|
||||
DEBUG_ONLY( bool has_graph() const { return !_graph.is_empty(); } )
|
||||
VTransformGraph& graph() { return _graph; }
|
||||
|
||||
void optimize() { return _graph.optimize(*this); }
|
||||
void optimize();
|
||||
bool schedule() { return _graph.schedule(); }
|
||||
bool is_profitable() const;
|
||||
float cost_for_vector_loop() const { return _graph.cost_for_vector_loop(); }
|
||||
@ -291,6 +291,36 @@ private:
|
||||
void apply_vectorization() const;
|
||||
};
|
||||
|
||||
// We keep track of the worklist during optimizations.
|
||||
// The concept is somewhat parallel to IGVN: we keep on
|
||||
// optimizing vtnodes on the worklist, which may in turn
|
||||
// add more nodes to the list. We keep on optimizing until
|
||||
// no more nodes are on the worklist.
|
||||
class VTransformOptimize : public StackObj {
|
||||
private:
|
||||
const VLoopAnalyzer& _vloop_analyzer;
|
||||
VTransform& _vtransform;
|
||||
|
||||
GrowableArray<VTransformNode*> _worklist;
|
||||
VectorSet _worklist_set;
|
||||
|
||||
public:
|
||||
VTransformOptimize(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform) :
|
||||
_vloop_analyzer(vloop_analyzer),
|
||||
_vtransform(vtransform) {}
|
||||
|
||||
const VLoopAnalyzer& vloop_analyzer() const { return _vloop_analyzer; }
|
||||
VTransform& vtransform() { return _vtransform; }
|
||||
|
||||
void worklist_push(VTransformNode* vtn);
|
||||
void optimize();
|
||||
|
||||
private:
|
||||
VTransformNode* worklist_pop();
|
||||
bool optimize_step(VTransformNode* vtn);
|
||||
DEBUG_ONLY( void verify(); )
|
||||
};
|
||||
|
||||
// Keeps track of the state during "VTransform::apply"
|
||||
// -> keep track of the already transformed nodes and the memory state.
|
||||
class VTransformApplyState : public StackObj {
|
||||
@ -531,10 +561,15 @@ public:
|
||||
|
||||
bool is_alive() const { return _is_alive; }
|
||||
|
||||
void mark_dead() {
|
||||
void mark_dead(VTransformOptimize& vtoptimize) {
|
||||
_is_alive = false;
|
||||
// Remove all inputs
|
||||
// Remove all inputs, and put inputs on worklist in
|
||||
// case they are also dead.
|
||||
for (uint i = 0; i < req(); i++) {
|
||||
VTransformNode* in = in_req(i);
|
||||
if (in != nullptr) {
|
||||
vtoptimize.worklist_push(in);
|
||||
}
|
||||
set_req(i, nullptr);
|
||||
}
|
||||
}
|
||||
@ -558,7 +593,7 @@ public:
|
||||
virtual const VPointer& vpointer() const { ShouldNotReachHere(); }
|
||||
virtual bool is_loop_head_phi() const { return false; }
|
||||
|
||||
virtual bool optimize(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform) { return false; }
|
||||
virtual bool optimize(VTransformOptimize& vtoptimize) { return false; }
|
||||
|
||||
virtual float cost(const VLoopAnalyzer& vloop_analyzer) const = 0;
|
||||
|
||||
@ -868,7 +903,7 @@ public:
|
||||
VTransformReductionVectorNode(VTransform& vtransform, const VTransformVectorNodeProperties properties) :
|
||||
VTransformVectorNode(vtransform, 3, properties) {}
|
||||
virtual VTransformReductionVectorNode* isa_ReductionVector() override { return this; }
|
||||
virtual bool optimize(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform) override;
|
||||
virtual bool optimize(VTransformOptimize& vtoptimize) override;
|
||||
virtual float cost(const VLoopAnalyzer& vloop_analyzer) const override;
|
||||
virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override;
|
||||
NOT_PRODUCT(virtual const char* name() const override { return "ReductionVector"; };)
|
||||
@ -876,8 +911,8 @@ public:
|
||||
private:
|
||||
int vector_reduction_opcode() const;
|
||||
bool requires_strict_order() const;
|
||||
bool optimize_move_non_strict_order_reductions_out_of_loop_preconditions(VTransform& vtransform);
|
||||
bool optimize_move_non_strict_order_reductions_out_of_loop(const VLoopAnalyzer& vloop_analyzer, VTransform& vtransform);
|
||||
bool optimize_move_non_strict_order_reductions_out_of_loop_preconditions(const VTransform& vtransform);
|
||||
bool optimize_move_non_strict_order_reductions_out_of_loop(VTransformOptimize& vtoptimize);
|
||||
};
|
||||
|
||||
class VTransformPhiVectorNode : public VTransformVectorNode {
|
||||
|
||||
@ -1478,10 +1478,10 @@ void Arguments::set_conservative_max_heap_alignment() {
|
||||
// the alignments imposed by several sources: any requirements from the heap
|
||||
// itself and the maximum page size we may run the VM with.
|
||||
size_t heap_alignment = GCConfig::arguments()->conservative_max_heap_alignment();
|
||||
_conservative_max_heap_alignment = MAX4(heap_alignment,
|
||||
_conservative_max_heap_alignment = MAX3(heap_alignment,
|
||||
os::vm_allocation_granularity(),
|
||||
os::max_page_size(),
|
||||
GCArguments::compute_heap_alignment());
|
||||
os::max_page_size());
|
||||
assert(is_power_of_2(_conservative_max_heap_alignment), "Expected to be a power-of-2");
|
||||
}
|
||||
|
||||
jint Arguments::set_ergonomics_flags() {
|
||||
@ -1589,8 +1589,8 @@ void Arguments::set_heap_size() {
|
||||
}
|
||||
|
||||
if (UseCompressedOops) {
|
||||
size_t heap_end = HeapBaseMinAddress + MaxHeapSize;
|
||||
size_t max_coop_heap = max_heap_for_compressed_oops();
|
||||
uintptr_t heap_end = HeapBaseMinAddress + MaxHeapSize;
|
||||
uintptr_t max_coop_heap = max_heap_for_compressed_oops();
|
||||
|
||||
// Limit the heap size to the maximum possible when using compressed oops
|
||||
if (heap_end < max_coop_heap) {
|
||||
@ -1607,7 +1607,7 @@ void Arguments::set_heap_size() {
|
||||
aot_log_info(aot)("UseCompressedOops disabled due to "
|
||||
"max heap %zu > compressed oop heap %zu. "
|
||||
"Please check the setting of MaxRAMPercentage %5.2f.",
|
||||
reasonable_max, max_coop_heap, MaxRAMPercentage);
|
||||
reasonable_max, (size_t)max_coop_heap, MaxRAMPercentage);
|
||||
FLAG_SET_ERGO(UseCompressedOops, false);
|
||||
} else {
|
||||
reasonable_max = max_coop_heap;
|
||||
|
||||
@ -75,6 +75,7 @@
|
||||
// v.release_store(x) -> void
|
||||
// v.release_store_fence(x) -> void
|
||||
// v.compare_exchange(x, y [, o]) -> T
|
||||
// v.exchange(x [, o]) -> T
|
||||
//
|
||||
// (2) All atomic types are default constructible.
|
||||
//
|
||||
@ -92,7 +93,6 @@
|
||||
// (3) Atomic pointers and atomic integers additionally provide
|
||||
//
|
||||
// member functions:
|
||||
// v.exchange(x [, o]) -> T
|
||||
// v.add_then_fetch(i [, o]) -> T
|
||||
// v.sub_then_fetch(i [, o]) -> T
|
||||
// v.fetch_then_add(i [, o]) -> T
|
||||
@ -102,10 +102,7 @@
|
||||
// type of i must be signed, or both must be unsigned. Atomic pointers perform
|
||||
// element arithmetic.
|
||||
//
|
||||
// (4) An atomic translated type additionally provides the exchange
|
||||
// function if its associated atomic decayed type provides that function.
|
||||
//
|
||||
// (5) Atomic integers additionally provide
|
||||
// (4) Atomic integers additionally provide
|
||||
//
|
||||
// member functions:
|
||||
// v.and_then_fetch(x [, o]) -> T
|
||||
@ -115,7 +112,7 @@
|
||||
// v.fetch_then_or(x [, o]) -> T
|
||||
// v.fetch_then_xor(x [, o]) -> T
|
||||
//
|
||||
// (6) Atomic pointers additionally provide
|
||||
// (5) Atomic pointers additionally provide
|
||||
//
|
||||
// nested types:
|
||||
// ElementType -> std::remove_pointer_t<T>
|
||||
@ -127,9 +124,6 @@
|
||||
// stand out a little more when used in surrounding non-atomic code. Without
|
||||
// the "AtomicAccess::" qualifier, some of those names are easily overlooked.
|
||||
//
|
||||
// Atomic bytes don't provide exchange(). This is because that operation
|
||||
// hasn't been implemented for 1 byte values. That could be changed if needed.
|
||||
//
|
||||
// Atomic for 2 byte integers is not supported. This is because atomic
|
||||
// operations of that size have not been implemented. There haven't been
|
||||
// required use-cases. Many platforms don't provide hardware support.
|
||||
@ -184,15 +178,8 @@ private:
|
||||
|
||||
// Helper base classes, providing various parts of the APIs.
|
||||
template<typename T> class CommonCore;
|
||||
template<typename T> class SupportsExchange;
|
||||
template<typename T> class SupportsArithmetic;
|
||||
|
||||
// Support conditional exchange() for atomic translated types.
|
||||
template<typename T> class HasExchange;
|
||||
template<typename T> class DecayedHasExchange;
|
||||
template<typename Derived, typename T, bool = DecayedHasExchange<T>::value>
|
||||
class TranslatedExchange;
|
||||
|
||||
public:
|
||||
template<typename T, Category = category<T>()>
|
||||
class Atomic;
|
||||
@ -275,15 +262,7 @@ public:
|
||||
atomic_memory_order order = memory_order_conservative) {
|
||||
return AtomicAccess::cmpxchg(value_ptr(), compare_value, new_value, order);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class AtomicImpl::SupportsExchange : public CommonCore<T> {
|
||||
protected:
|
||||
explicit SupportsExchange(T value) : CommonCore<T>(value) {}
|
||||
~SupportsExchange() = default;
|
||||
|
||||
public:
|
||||
T exchange(T new_value,
|
||||
atomic_memory_order order = memory_order_conservative) {
|
||||
return AtomicAccess::xchg(this->value_ptr(), new_value, order);
|
||||
@ -291,7 +270,7 @@ public:
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class AtomicImpl::SupportsArithmetic : public SupportsExchange<T> {
|
||||
class AtomicImpl::SupportsArithmetic : public CommonCore<T> {
|
||||
// Guarding the AtomicAccess calls with constexpr checking of Offset produces
|
||||
// better compile-time error messages.
|
||||
template<typename Offset>
|
||||
@ -311,7 +290,7 @@ class AtomicImpl::SupportsArithmetic : public SupportsExchange<T> {
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit SupportsArithmetic(T value) : SupportsExchange<T>(value) {}
|
||||
explicit SupportsArithmetic(T value) : CommonCore<T>(value) {}
|
||||
~SupportsArithmetic() = default;
|
||||
|
||||
public:
|
||||
@ -424,54 +403,8 @@ public:
|
||||
|
||||
// Atomic translated type
|
||||
|
||||
// Test whether Atomic<T> has exchange().
|
||||
template<typename T>
|
||||
class AtomicImpl::HasExchange {
|
||||
template<typename Check> static void* test(decltype(&Check::exchange));
|
||||
template<typename> static int test(...);
|
||||
using test_type = decltype(test<Atomic<T>>(nullptr));
|
||||
public:
|
||||
static constexpr bool value = std::is_pointer_v<test_type>;
|
||||
};
|
||||
|
||||
// Test whether the atomic decayed type associated with T has exchange().
|
||||
template<typename T>
|
||||
class AtomicImpl::DecayedHasExchange {
|
||||
using Translator = PrimitiveConversions::Translate<T>;
|
||||
using Decayed = typename Translator::Decayed;
|
||||
|
||||
// "Unit test" HasExchange<>.
|
||||
static_assert(HasExchange<int>::value);
|
||||
static_assert(HasExchange<int*>::value);
|
||||
static_assert(!HasExchange<char>::value);
|
||||
|
||||
public:
|
||||
static constexpr bool value = HasExchange<Decayed>::value;
|
||||
};
|
||||
|
||||
// Base class for atomic translated type if atomic decayed type doesn't have
|
||||
// exchange().
|
||||
template<typename Derived, typename T, bool>
|
||||
class AtomicImpl::TranslatedExchange {};
|
||||
|
||||
// Base class for atomic translated type if atomic decayed type does have
|
||||
// exchange().
|
||||
template<typename Derived, typename T>
|
||||
class AtomicImpl::TranslatedExchange<Derived, T, true> {
|
||||
public:
|
||||
T exchange(T new_value,
|
||||
atomic_memory_order order = memory_order_conservative) {
|
||||
return static_cast<Derived*>(this)->exchange_impl(new_value, order);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class AtomicImpl::Atomic<T, AtomicImpl::Category::Translated>
|
||||
: public TranslatedExchange<Atomic<T>, T>
|
||||
{
|
||||
// Give TranslatedExchange<> access to exchange_impl() if needed.
|
||||
friend class TranslatedExchange<Atomic<T>, T>;
|
||||
|
||||
class AtomicImpl::Atomic<T, AtomicImpl::Category::Translated> {
|
||||
using Translator = PrimitiveConversions::Translate<T>;
|
||||
using Decayed = typename Translator::Decayed;
|
||||
|
||||
@ -533,12 +466,7 @@ public:
|
||||
order));
|
||||
}
|
||||
|
||||
private:
|
||||
// Implementation of exchange() if needed.
|
||||
// Exclude when not needed, to prevent reference to non-existent function
|
||||
// of atomic decayed type if someone explicitly instantiates Atomic<T>.
|
||||
template<typename Dep = Decayed, ENABLE_IF(HasExchange<Dep>::value)>
|
||||
T exchange_impl(T new_value, atomic_memory_order order) {
|
||||
T exchange(T new_value, atomic_memory_order order = memory_order_conservative) {
|
||||
return recover(_value.exchange(decay(new_value), order));
|
||||
}
|
||||
};
|
||||
|
||||
@ -419,8 +419,8 @@ private:
|
||||
struct XchgImpl;
|
||||
|
||||
// Platform-specific implementation of xchg. Support for sizes
|
||||
// of 4, and sizeof(intptr_t) are required. The class is a function
|
||||
// object that must be default constructable, with these requirements:
|
||||
// of 1, 4, and 8 are required. The class is a function object
|
||||
// that must be default constructable, with these requirements:
|
||||
//
|
||||
// - dest is of type T*.
|
||||
// - exchange_value is of type T.
|
||||
@ -635,7 +635,6 @@ inline void AtomicAccess::dec(D volatile* dest, atomic_memory_order order) {
|
||||
STATIC_ASSERT(std::is_pointer<D>::value || std::is_integral<D>::value);
|
||||
using I = std::conditional_t<std::is_pointer<D>::value, ptrdiff_t, D>;
|
||||
// Assumes two's complement integer representation.
|
||||
#pragma warning(suppress: 4146)
|
||||
AtomicAccess::add(dest, I(-1), order);
|
||||
}
|
||||
|
||||
@ -652,7 +651,6 @@ inline D AtomicAccess::sub(D volatile* dest, I sub_value, atomic_memory_order or
|
||||
STATIC_ASSERT(sizeof(I) <= sizeof(AddendType));
|
||||
AddendType addend = sub_value;
|
||||
// Assumes two's complement integer representation.
|
||||
#pragma warning(suppress: 4146) // In case AddendType is not signed.
|
||||
return AtomicAccess::add(dest, -addend, order);
|
||||
}
|
||||
|
||||
|
||||
@ -118,8 +118,5 @@ ThreadTotalCPUTimeClosure::~ThreadTotalCPUTimeClosure() {
|
||||
}
|
||||
|
||||
void ThreadTotalCPUTimeClosure::do_thread(Thread* thread) {
|
||||
// The default code path (fast_thread_cpu_time()) asserts that
|
||||
// pthread_getcpuclockid() and clock_gettime() must return 0. Thus caller
|
||||
// must ensure the thread exists and has not terminated.
|
||||
_total += os::thread_cpu_time(thread);
|
||||
}
|
||||
|
||||
@ -162,7 +162,7 @@ void JVMFlag::print_on(outputStream* st, bool withComments, bool printRanges) co
|
||||
// uintx ThresholdTolerance = 10 {product} {default}
|
||||
// size_t TLABSize = 0 {product} {default}
|
||||
// uintx SurvivorRatio = 8 {product} {default}
|
||||
// double InitialRAMPercentage = 1.562500 {product} {default}
|
||||
// double InitialRAMPercentage = 0.000000 {product} {default}
|
||||
// ccstr CompileCommandFile = MyFile.cmd {product} {command line}
|
||||
// ccstrlist CompileOnly = Method1
|
||||
// CompileOnly += Method2 {product} {command line}
|
||||
|
||||
@ -664,6 +664,7 @@ void VMError::report(outputStream* st, bool _verbose) {
|
||||
BEGIN
|
||||
if (MemTracker::enabled() &&
|
||||
NmtVirtualMemory_lock != nullptr &&
|
||||
_thread != nullptr &&
|
||||
NmtVirtualMemory_lock->owned_by_self()) {
|
||||
// Manually unlock to avoid reentrancy due to mallocs in detailed mode.
|
||||
NmtVirtualMemory_lock->unlock();
|
||||
@ -1305,7 +1306,7 @@ void VMError::report(outputStream* st, bool _verbose) {
|
||||
os::print_signal_handlers(st, buf, sizeof(buf));
|
||||
st->cr();
|
||||
|
||||
STEP_IF("Native Memory Tracking", _verbose)
|
||||
STEP_IF("Native Memory Tracking", _verbose && _thread != nullptr)
|
||||
MemTracker::error_report(st);
|
||||
st->cr();
|
||||
|
||||
|
||||
@ -23,7 +23,6 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/atomicAccess.hpp"
|
||||
#include "runtime/orderAccess.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
#include "utilities/spinYield.hpp"
|
||||
@ -79,10 +78,10 @@
|
||||
|
||||
void GenericWaitBarrier::arm(int barrier_tag) {
|
||||
assert(barrier_tag != 0, "Pre arm: Should be arming with armed value");
|
||||
assert(AtomicAccess::load(&_barrier_tag) == 0,
|
||||
assert(_barrier_tag.load_relaxed() == 0,
|
||||
"Pre arm: Should not be already armed. Tag: %d",
|
||||
AtomicAccess::load(&_barrier_tag));
|
||||
AtomicAccess::release_store(&_barrier_tag, barrier_tag);
|
||||
_barrier_tag.load_relaxed());
|
||||
_barrier_tag.release_store(barrier_tag);
|
||||
|
||||
Cell &cell = tag_to_cell(barrier_tag);
|
||||
cell.arm(barrier_tag);
|
||||
@ -92,9 +91,9 @@ void GenericWaitBarrier::arm(int barrier_tag) {
|
||||
}
|
||||
|
||||
void GenericWaitBarrier::disarm() {
|
||||
int barrier_tag = AtomicAccess::load_acquire(&_barrier_tag);
|
||||
int barrier_tag = _barrier_tag.load_acquire();
|
||||
assert(barrier_tag != 0, "Pre disarm: Should be armed. Tag: %d", barrier_tag);
|
||||
AtomicAccess::release_store(&_barrier_tag, 0);
|
||||
_barrier_tag.release_store(0);
|
||||
|
||||
Cell &cell = tag_to_cell(barrier_tag);
|
||||
cell.disarm(barrier_tag);
|
||||
@ -121,7 +120,7 @@ void GenericWaitBarrier::Cell::arm(int32_t requested_tag) {
|
||||
|
||||
SpinYield sp;
|
||||
while (true) {
|
||||
state = AtomicAccess::load_acquire(&_state);
|
||||
state = _state.load_acquire();
|
||||
assert(decode_tag(state) == 0,
|
||||
"Pre arm: Should not be armed. "
|
||||
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
|
||||
@ -134,7 +133,7 @@ void GenericWaitBarrier::Cell::arm(int32_t requested_tag) {
|
||||
|
||||
// Try to swing cell to armed. This should always succeed after the check above.
|
||||
int64_t new_state = encode(requested_tag, 0);
|
||||
int64_t prev_state = AtomicAccess::cmpxchg(&_state, state, new_state);
|
||||
int64_t prev_state = _state.compare_exchange(state, new_state);
|
||||
if (prev_state != state) {
|
||||
fatal("Cannot arm the wait barrier. "
|
||||
"Tag: " INT32_FORMAT "; Waiters: " INT32_FORMAT,
|
||||
@ -145,14 +144,14 @@ void GenericWaitBarrier::Cell::arm(int32_t requested_tag) {
|
||||
int GenericWaitBarrier::Cell::signal_if_needed(int max) {
|
||||
int signals = 0;
|
||||
while (true) {
|
||||
int cur = AtomicAccess::load_acquire(&_outstanding_wakeups);
|
||||
int cur = _outstanding_wakeups.load_acquire();
|
||||
if (cur == 0) {
|
||||
// All done, no more waiters.
|
||||
return 0;
|
||||
}
|
||||
assert(cur > 0, "Sanity");
|
||||
|
||||
int prev = AtomicAccess::cmpxchg(&_outstanding_wakeups, cur, cur - 1);
|
||||
int prev = _outstanding_wakeups.compare_exchange(cur, cur - 1);
|
||||
if (prev != cur) {
|
||||
// Contention, return to caller for early return or backoff.
|
||||
return prev;
|
||||
@ -172,7 +171,7 @@ void GenericWaitBarrier::Cell::disarm(int32_t expected_tag) {
|
||||
int32_t waiters;
|
||||
|
||||
while (true) {
|
||||
int64_t state = AtomicAccess::load_acquire(&_state);
|
||||
int64_t state = _state.load_acquire();
|
||||
int32_t tag = decode_tag(state);
|
||||
waiters = decode_waiters(state);
|
||||
|
||||
@ -182,7 +181,7 @@ void GenericWaitBarrier::Cell::disarm(int32_t expected_tag) {
|
||||
tag, waiters);
|
||||
|
||||
int64_t new_state = encode(0, waiters);
|
||||
if (AtomicAccess::cmpxchg(&_state, state, new_state) == state) {
|
||||
if (_state.compare_exchange(state, new_state) == state) {
|
||||
// Successfully disarmed.
|
||||
break;
|
||||
}
|
||||
@ -191,19 +190,19 @@ void GenericWaitBarrier::Cell::disarm(int32_t expected_tag) {
|
||||
// Wake up waiters, if we have at least one.
|
||||
// Allow other threads to assist with wakeups, if possible.
|
||||
if (waiters > 0) {
|
||||
AtomicAccess::release_store(&_outstanding_wakeups, waiters);
|
||||
_outstanding_wakeups.release_store(waiters);
|
||||
SpinYield sp;
|
||||
while (signal_if_needed(INT_MAX) > 0) {
|
||||
sp.wait();
|
||||
}
|
||||
}
|
||||
assert(AtomicAccess::load(&_outstanding_wakeups) == 0, "Post disarm: Should not have outstanding wakeups");
|
||||
assert(_outstanding_wakeups.load_relaxed() == 0, "Post disarm: Should not have outstanding wakeups");
|
||||
}
|
||||
|
||||
void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
|
||||
// Try to register ourselves as pending waiter.
|
||||
while (true) {
|
||||
int64_t state = AtomicAccess::load_acquire(&_state);
|
||||
int64_t state = _state.load_acquire();
|
||||
int32_t tag = decode_tag(state);
|
||||
if (tag != expected_tag) {
|
||||
// Cell tag had changed while waiting here. This means either the cell had
|
||||
@ -219,7 +218,7 @@ void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
|
||||
tag, waiters);
|
||||
|
||||
int64_t new_state = encode(tag, waiters + 1);
|
||||
if (AtomicAccess::cmpxchg(&_state, state, new_state) == state) {
|
||||
if (_state.compare_exchange(state, new_state) == state) {
|
||||
// Success! Proceed to wait.
|
||||
break;
|
||||
}
|
||||
@ -238,7 +237,7 @@ void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
|
||||
|
||||
// Register ourselves as completed waiter before leaving.
|
||||
while (true) {
|
||||
int64_t state = AtomicAccess::load_acquire(&_state);
|
||||
int64_t state = _state.load_acquire();
|
||||
int32_t tag = decode_tag(state);
|
||||
int32_t waiters = decode_waiters(state);
|
||||
|
||||
@ -248,7 +247,7 @@ void GenericWaitBarrier::Cell::wait(int32_t expected_tag) {
|
||||
tag, waiters);
|
||||
|
||||
int64_t new_state = encode(tag, waiters - 1);
|
||||
if (AtomicAccess::cmpxchg(&_state, state, new_state) == state) {
|
||||
if (_state.compare_exchange(state, new_state) == state) {
|
||||
// Success!
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -27,6 +27,7 @@
|
||||
|
||||
#include "memory/allocation.hpp"
|
||||
#include "memory/padded.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
#include "runtime/semaphore.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
|
||||
@ -43,10 +44,10 @@ private:
|
||||
Semaphore _sem;
|
||||
|
||||
// Cell state, tracks the arming + waiters status
|
||||
volatile int64_t _state;
|
||||
Atomic<int64_t> _state;
|
||||
|
||||
// Wakeups to deliver for current waiters
|
||||
volatile int _outstanding_wakeups;
|
||||
Atomic<int> _outstanding_wakeups;
|
||||
|
||||
int signal_if_needed(int max);
|
||||
|
||||
@ -83,7 +84,7 @@ private:
|
||||
// Trailing padding to protect the last cell.
|
||||
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_PADDING_SIZE, 0);
|
||||
|
||||
volatile int _barrier_tag;
|
||||
Atomic<int> _barrier_tag;
|
||||
|
||||
// Trailing padding to insulate the rest of the barrier from adjacent
|
||||
// data structures. The leading padding is not needed, as cell padding
|
||||
|
||||
@ -315,6 +315,18 @@ final class VirtualThread extends BaseVirtualThread {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Submits the given task to the given executor. If the scheduler is a
|
||||
* ForkJoinPool then the task is first adapted to a ForkJoinTask.
|
||||
*/
|
||||
private void submit(Executor executor, Runnable task) {
|
||||
if (executor instanceof ForkJoinPool pool) {
|
||||
pool.submit(ForkJoinTask.adapt(task));
|
||||
} else {
|
||||
executor.execute(task);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Submits the runContinuation task to the scheduler. For the default scheduler,
|
||||
* and calling it on a worker thread, the task will be pushed to the local queue,
|
||||
@ -335,12 +347,12 @@ final class VirtualThread extends BaseVirtualThread {
|
||||
if (currentThread().isVirtual()) {
|
||||
Continuation.pin();
|
||||
try {
|
||||
scheduler.execute(runContinuation);
|
||||
submit(scheduler, runContinuation);
|
||||
} finally {
|
||||
Continuation.unpin();
|
||||
}
|
||||
} else {
|
||||
scheduler.execute(runContinuation);
|
||||
submit(scheduler, runContinuation);
|
||||
}
|
||||
done = true;
|
||||
} catch (RejectedExecutionException ree) {
|
||||
@ -1536,4 +1548,4 @@ final class VirtualThread extends BaseVirtualThread {
|
||||
unblocker.setDaemon(true);
|
||||
unblocker.start();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -72,7 +72,7 @@ public interface TypeVariable<D extends GenericDeclaration> extends Type, Annota
|
||||
Type[] getBounds();
|
||||
|
||||
/**
|
||||
* Returns the {@code GenericDeclaration} object representing the
|
||||
* Returns a {@code GenericDeclaration} object representing the
|
||||
* generic declaration declared for this type variable.
|
||||
*
|
||||
* @return the generic declaration declared for this type variable.
|
||||
|
||||
@ -203,7 +203,7 @@ public final class Files {
|
||||
* @throws UnsupportedOperationException
|
||||
* if an unsupported option is specified
|
||||
* @throws FileAlreadyExistsException
|
||||
* If a file of that name already exists and the {@link
|
||||
* If the path locates an existing file and the {@link
|
||||
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
|
||||
* <i>(optional specific exception)</i>
|
||||
* @throws IOException
|
||||
@ -340,7 +340,7 @@ public final class Files {
|
||||
* if an unsupported open option is specified or the array contains
|
||||
* attributes that cannot be set atomically when creating the file
|
||||
* @throws FileAlreadyExistsException
|
||||
* If a file of that name already exists and the {@link
|
||||
* If the path locates an existing file and the {@link
|
||||
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
|
||||
* and the file is being opened for writing <i>(optional specific
|
||||
* exception)</i>
|
||||
@ -377,7 +377,7 @@ public final class Files {
|
||||
* @throws UnsupportedOperationException
|
||||
* if an unsupported open option is specified
|
||||
* @throws FileAlreadyExistsException
|
||||
* If a file of that name already exists and the {@link
|
||||
* If the path locates an existing file and the {@link
|
||||
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
|
||||
* and the file is being opened for writing <i>(optional specific
|
||||
* exception)</i>
|
||||
@ -575,10 +575,11 @@ public final class Files {
|
||||
Set.of(StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE);
|
||||
|
||||
/**
|
||||
* Creates a new and empty file, failing if the file already exists. The
|
||||
* check for the existence of the file and the creation of the new file if
|
||||
* it does not exist are a single operation that is atomic with respect to
|
||||
* all other filesystem activities that might affect the directory.
|
||||
* Creates a new and empty file, failing if {@code path} locates an existing
|
||||
* file. The check for the existence of the file and the creation of the new
|
||||
* file if it does not exist are a single operation that is atomic with
|
||||
* respect to all other filesystem activities that might affect the
|
||||
* directory.
|
||||
*
|
||||
* <p> The {@code attrs} parameter is optional {@link FileAttribute
|
||||
* file-attributes} to set atomically when creating the file. Each attribute
|
||||
@ -598,7 +599,7 @@ public final class Files {
|
||||
* if the array contains an attribute that cannot be set atomically
|
||||
* when creating the file
|
||||
* @throws FileAlreadyExistsException
|
||||
* If a file of that name already exists
|
||||
* if {@code path} locates an existing file
|
||||
* <i>(optional specific exception)</i>
|
||||
* @throws IOException
|
||||
* if an I/O error occurs or the parent directory does not exist
|
||||
@ -611,7 +612,8 @@ public final class Files {
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new directory. The check for the existence of the file and the
|
||||
* Creates a new directory, failing if {@code dir} locates an existing
|
||||
* file. The check for the existence of the file and the
|
||||
* creation of the directory if it does not exist are a single operation
|
||||
* that is atomic with respect to all other filesystem activities that might
|
||||
* affect the directory. The {@link #createDirectories createDirectories}
|
||||
@ -636,8 +638,8 @@ public final class Files {
|
||||
* if the array contains an attribute that cannot be set atomically
|
||||
* when creating the directory
|
||||
* @throws FileAlreadyExistsException
|
||||
* if a directory could not otherwise be created because a file of
|
||||
* that name already exists <i>(optional specific exception)</i>
|
||||
* if {@code dir} locates an existing file
|
||||
* <i>(optional specific exception)</i>
|
||||
* @throws IOException
|
||||
* if an I/O error occurs or the parent directory does not exist
|
||||
*/
|
||||
@ -676,8 +678,8 @@ public final class Files {
|
||||
* if the array contains an attribute that cannot be set atomically
|
||||
* when creating the directory
|
||||
* @throws FileAlreadyExistsException
|
||||
* if {@code dir} exists but is not a directory <i>(optional specific
|
||||
* exception)</i>
|
||||
* if {@code dir} locates an existing file that is not a directory
|
||||
* <i>(optional specific exception)</i>
|
||||
* @throws IOException
|
||||
* if an I/O error occurs
|
||||
*/
|
||||
@ -930,7 +932,8 @@ public final class Files {
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a symbolic link to a target <i>(optional operation)</i>.
|
||||
* Creates a symbolic link to a target, failing if {@code link} locates an
|
||||
* existing file <i>(optional operation)</i>.
|
||||
*
|
||||
* <p> The {@code target} parameter is the target of the link. It may be an
|
||||
* {@link Path#isAbsolute absolute} or relative path and may not exist. When
|
||||
@ -964,8 +967,8 @@ public final class Files {
|
||||
* array contains an attribute that cannot be set atomically when
|
||||
* creating the symbolic link
|
||||
* @throws FileAlreadyExistsException
|
||||
* if a file with the name already exists <i>(optional specific
|
||||
* exception)</i>
|
||||
* if {@code link} locates an existing file
|
||||
* <i>(optional specific exception)</i>
|
||||
* @throws IOException
|
||||
* if an I/O error occurs
|
||||
*/
|
||||
@ -978,7 +981,8 @@ public final class Files {
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new link (directory entry) for an existing file <i>(optional
|
||||
* Creates a new link (directory entry) for an existing file,
|
||||
* failing if {@code link} locates an existing file <i>(optional
|
||||
* operation)</i>.
|
||||
*
|
||||
* <p> The {@code link} parameter locates the directory entry to create.
|
||||
@ -1007,8 +1011,8 @@ public final class Files {
|
||||
* if the implementation does not support adding an existing file
|
||||
* to a directory
|
||||
* @throws FileAlreadyExistsException
|
||||
* if the entry could not otherwise be created because a file of
|
||||
* that name already exists <i>(optional specific exception)</i>
|
||||
* if {@code link} locates an existing file
|
||||
* <i>(optional specific exception)</i>
|
||||
* @throws IOException
|
||||
* if an I/O error occurs
|
||||
*/
|
||||
@ -2711,7 +2715,7 @@ public final class Files {
|
||||
* @throws UnsupportedOperationException
|
||||
* if an unsupported option is specified
|
||||
* @throws FileAlreadyExistsException
|
||||
* If a file of that name already exists and the {@link
|
||||
* If the path locates an existing file and the {@link
|
||||
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
|
||||
* <i>(optional specific exception)</i>
|
||||
*
|
||||
@ -2754,7 +2758,7 @@ public final class Files {
|
||||
* @throws UnsupportedOperationException
|
||||
* if an unsupported option is specified
|
||||
* @throws FileAlreadyExistsException
|
||||
* If a file of that name already exists and the {@link
|
||||
* If the path locates an existing file and the {@link
|
||||
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
|
||||
* <i>(optional specific exception)</i>
|
||||
*
|
||||
@ -3161,7 +3165,7 @@ public final class Files {
|
||||
* @throws UnsupportedOperationException
|
||||
* if an unsupported option is specified
|
||||
* @throws FileAlreadyExistsException
|
||||
* If a file of that name already exists and the {@link
|
||||
* If the path locates an existing file and the {@link
|
||||
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
|
||||
* <i>(optional specific exception)</i>
|
||||
*/
|
||||
@ -3222,7 +3226,7 @@ public final class Files {
|
||||
* @throws UnsupportedOperationException
|
||||
* if an unsupported option is specified
|
||||
* @throws FileAlreadyExistsException
|
||||
* If a file of that name already exists and the {@link
|
||||
* If the path locates an existing file and the {@link
|
||||
* StandardOpenOption#CREATE_NEW CREATE_NEW} option is specified
|
||||
* <i>(optional specific exception)</i>
|
||||
*/
|
||||
|
||||
@ -560,89 +560,70 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
* access (which is usually needed anyway).
|
||||
*
|
||||
* Signalling. Signals (in signalWork) cause new or reactivated
|
||||
* workers to scan for tasks. Method signalWork and its callers
|
||||
* try to approximate the unattainable goal of having the right
|
||||
* number of workers activated for the tasks at hand, but must err
|
||||
* on the side of too many workers vs too few to avoid stalls:
|
||||
* workers to scan for tasks. SignalWork is invoked in two cases:
|
||||
* (1) When a task is pushed onto an empty queue, and (2) When a
|
||||
* worker takes a top-level task from a queue that has additional
|
||||
* tasks. Together, these suffice in O(log(#threads)) steps to
|
||||
* fully activate with at least enough workers, and ideally no
|
||||
* more than required. This ideal is unobtainable: Callers do not
|
||||
* know whether another worker will finish its current task and
|
||||
* poll for others without need of a signal (which is otherwise an
|
||||
* advantage of work-stealing vs other schemes), and also must
|
||||
* conservatively estimate the triggering conditions of emptiness
|
||||
* or non-emptiness; all of which usually cause more activations
|
||||
* than necessary (see below). (Method signalWork is also used as
|
||||
* failsafe in case of Thread failures in deregisterWorker, to
|
||||
* activate or create a new worker to replace them).
|
||||
*
|
||||
* * If computations are purely tree structured, it suffices for
|
||||
* every worker to activate another when it pushes a task into
|
||||
* an empty queue, resulting in O(log(#threads)) steps to full
|
||||
* activation. Emptiness must be conservatively approximated,
|
||||
* which may result in unnecessary signals. Also, to reduce
|
||||
* resource usages in some cases, at the expense of slower
|
||||
* startup in others, activation of an idle thread is preferred
|
||||
* over creating a new one, here and elsewhere.
|
||||
*
|
||||
* * At the other extreme, if "flat" tasks (those that do not in
|
||||
* turn generate others) come in serially from only a single
|
||||
* producer, each worker taking a task from a queue should
|
||||
* propagate a signal if there are more tasks in that
|
||||
* queue. This is equivalent to, but generally faster than,
|
||||
* arranging the stealer take multiple tasks, re-pushing one or
|
||||
* more on its own queue, and signalling (because its queue is
|
||||
* empty), also resulting in logarithmic full activation
|
||||
* time. If tasks do not not engage in unbounded loops based on
|
||||
* the actions of other workers with unknown dependencies loop,
|
||||
* this form of proagation can be limited to one signal per
|
||||
* activation (phase change). We distinguish the cases by
|
||||
* further signalling only if the task is an InterruptibleTask
|
||||
* (see below), which are the only supported forms of task that
|
||||
* may do so.
|
||||
*
|
||||
* * Because we don't know about usage patterns (or most commonly,
|
||||
* mixtures), we use both approaches, which present even more
|
||||
* opportunities to over-signal. (Failure to distinguish these
|
||||
* cases in terms of submission methods was arguably an early
|
||||
* design mistake.) Note that in either of these contexts,
|
||||
* signals may be (and often are) unnecessary because active
|
||||
* workers continue scanning after running tasks without the
|
||||
* need to be signalled (which is one reason work stealing is
|
||||
* often faster than alternatives), so additional workers
|
||||
* aren't needed.
|
||||
*
|
||||
* * For rapidly branching tasks that require full pool resources,
|
||||
* oversignalling is OK, because signalWork will soon have no
|
||||
* more workers to create or reactivate. But for others (mainly
|
||||
* externally submitted tasks), overprovisioning may cause very
|
||||
* noticeable slowdowns due to contention and resource
|
||||
* wastage. We reduce impact by deactivating workers when
|
||||
* queues don't have accessible tasks, but reactivating and
|
||||
* rescanning if other tasks remain.
|
||||
*
|
||||
* * Despite these, signal contention and overhead effects still
|
||||
* occur during ramp-up and ramp-down of small computations.
|
||||
* Top-Level scheduling
|
||||
* ====================
|
||||
*
|
||||
* Scanning. Method runWorker performs top-level scanning for (and
|
||||
* execution of) tasks by polling a pseudo-random permutation of
|
||||
* the array (by starting at a given index, and using a constant
|
||||
* cyclically exhaustive stride.) It uses the same basic polling
|
||||
* method as WorkQueue.poll(), but restarts with a different
|
||||
* permutation on each invocation. The pseudorandom generator
|
||||
* need not have high-quality statistical properties in the long
|
||||
* permutation on each rescan. The pseudorandom generator need
|
||||
* not have high-quality statistical properties in the long
|
||||
* term. We use Marsaglia XorShifts, seeded with the Weyl sequence
|
||||
* from ThreadLocalRandom probes, which are cheap and
|
||||
* suffice. Each queue's polling attempts to avoid becoming stuck
|
||||
* when other scanners/pollers stall. Scans do not otherwise
|
||||
* explicitly take into account core affinities, loads, cache
|
||||
* localities, etc, However, they do exploit temporal locality
|
||||
* (which usually approximates these) by preferring to re-poll
|
||||
* from the same queue after a successful poll before trying
|
||||
* others, which also reduces bookkeeping, cache traffic, and
|
||||
* scanning overhead. But it also reduces fairness, which is
|
||||
* partially counteracted by giving up on detected interference
|
||||
* (which also reduces contention when too many workers try to
|
||||
* take small tasks from the same queue).
|
||||
* from ThreadLocalRandom probes, which are cheap and suffice.
|
||||
*
|
||||
* Deactivation. When no tasks are found by a worker in runWorker,
|
||||
* it tries to deactivate()), giving up (and rescanning) on "ctl"
|
||||
* contention. To avoid missed signals during deactivation, the
|
||||
* method rescans and reactivates if there may have been a missed
|
||||
* signal during deactivation. To reduce false-alarm reactivations
|
||||
* while doing so, we scan multiple times (analogously to method
|
||||
* quiescent()) before trying to reactivate. Because idle workers
|
||||
* are often not yet blocked (parked), we use a WorkQueue field to
|
||||
* advertise that a waiter actually needs unparking upon signal.
|
||||
* it invokes deactivate, that first deactivates (to an IDLE
|
||||
* phase). Avoiding missed signals during deactivation requires a
|
||||
* (conservative) rescan, reactivating if there may be tasks to
|
||||
* poll. Because idle workers are often not yet blocked (parked),
|
||||
* we use a WorkQueue field to advertise that a waiter actually
|
||||
* needs unparking upon signal.
|
||||
*
|
||||
* When tasks are constructed as (recursive) DAGs, top-level
|
||||
* scanning is usually infrequent, and doesn't encounter most
|
||||
* of the following problems addressed by runWorker and awaitWork:
|
||||
*
|
||||
* Locality. Polls are organized into "runs", continuing until
|
||||
* empty or contended, while also minimizing interference by
|
||||
* postponing bookeeping to ends of runs. This may reduce
|
||||
* fairness.
|
||||
*
|
||||
* Contention. When many workers try to poll few queues, they
|
||||
* often collide, generating CAS failures and disrupting locality
|
||||
* of workers already running their tasks. This also leads to
|
||||
* stalls when tasks cannot be taken because other workers have
|
||||
* not finished poll operations, which is detected by reading
|
||||
* ahead in queue arrays. In both cases, workers restart scans in a
|
||||
* way that approximates randomized backoff.
|
||||
*
|
||||
* Oversignalling. When many short top-level tasks are present in
|
||||
* a small number of queues, the above signalling strategy may
|
||||
* activate many more workers than needed, worsening locality and
|
||||
* contention problems, while also generating more global
|
||||
* contention (field ctl is CASed on every activation and
|
||||
* deactivation). We filter out (both in runWorker and
|
||||
* signalWork) attempted signals that are surely not needed
|
||||
* because the signalled tasks are already taken.
|
||||
*
|
||||
* Shutdown and Quiescence
|
||||
* =======================
|
||||
*
|
||||
* Quiescence. Workers scan looking for work, giving up when they
|
||||
* don't find any, without being sure that none are available.
|
||||
@ -892,9 +873,7 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
* shutdown, runners are interrupted so they can cancel. Since
|
||||
* external joining callers never run these tasks, they must await
|
||||
* cancellation by others, which can occur along several different
|
||||
* paths. The inability to rely on caller-runs may also require
|
||||
* extra signalling (resulting in scanning and contention) so is
|
||||
* done only conditionally in methods push and runworker.
|
||||
* paths.
|
||||
*
|
||||
* Across these APIs, rules for reporting exceptions for tasks
|
||||
* with results accessed via join() differ from those via get(),
|
||||
@ -961,9 +940,13 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
* less-contended applications. To help arrange this, some
|
||||
* non-reference fields are declared as "long" even when ints or
|
||||
* shorts would suffice. For class WorkQueue, an
|
||||
* embedded @Contended region segregates fields most heavily
|
||||
* updated by owners from those most commonly read by stealers or
|
||||
* other management.
|
||||
* embedded @Contended isolates the very busy top index, along
|
||||
* with status and bookkeeping fields written (mostly) by owners,
|
||||
* that otherwise interfere with reading array and base
|
||||
* fields. There are other variables commonly contributing to
|
||||
* false-sharing-related performance issues (including fields of
|
||||
* class Thread), but we can't do much about this except try to
|
||||
* minimize access.
|
||||
*
|
||||
* Initial sizing and resizing of WorkQueue arrays is an even more
|
||||
* delicate tradeoff because the best strategy systematically
|
||||
@ -972,13 +955,11 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
* direct false-sharing and indirect cases due to GC bookkeeping
|
||||
* (cardmarks etc), and reduce the number of resizes, which are
|
||||
* not especially fast because they require atomic transfers.
|
||||
* Currently, arrays for workers are initialized to be just large
|
||||
* enough to avoid resizing in most tree-structured tasks, but
|
||||
* larger for external queues where both false-sharing problems
|
||||
* and the need for resizing are more common. (Maintenance note:
|
||||
* any changes in fields, queues, or their uses, or JVM layout
|
||||
* policies, must be accompanied by re-evaluation of these
|
||||
* placement and sizing decisions.)
|
||||
* Currently, arrays are initialized to be just large enough to
|
||||
* avoid resizing in most tree-structured tasks, but grow rapidly
|
||||
* until large. (Maintenance note: any changes in fields, queues,
|
||||
* or their uses, or JVM layout policies, must be accompanied by
|
||||
* re-evaluation of these placement and sizing decisions.)
|
||||
*
|
||||
* Style notes
|
||||
* ===========
|
||||
@ -1061,17 +1042,11 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
static final int DEFAULT_COMMON_MAX_SPARES = 256;
|
||||
|
||||
/**
|
||||
* Initial capacity of work-stealing queue array for workers.
|
||||
* Initial capacity of work-stealing queue array.
|
||||
* Must be a power of two, at least 2. See above.
|
||||
*/
|
||||
static final int INITIAL_QUEUE_CAPACITY = 1 << 6;
|
||||
|
||||
/**
|
||||
* Initial capacity of work-stealing queue array for external queues.
|
||||
* Must be a power of two, at least 2. See above.
|
||||
*/
|
||||
static final int INITIAL_EXTERNAL_QUEUE_CAPACITY = 1 << 9;
|
||||
|
||||
// conversions among short, int, long
|
||||
static final int SMASK = 0xffff; // (unsigned) short bits
|
||||
static final long LMASK = 0xffffffffL; // lower 32 bits of long
|
||||
@ -1211,11 +1186,11 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
@jdk.internal.vm.annotation.Contended("w")
|
||||
int stackPred; // pool stack (ctl) predecessor link
|
||||
@jdk.internal.vm.annotation.Contended("w")
|
||||
volatile int parking; // nonzero if parked in awaitWork
|
||||
@jdk.internal.vm.annotation.Contended("w")
|
||||
volatile int source; // source queue id (or DROPPED)
|
||||
@jdk.internal.vm.annotation.Contended("w")
|
||||
int nsteals; // number of steals from other queues
|
||||
@jdk.internal.vm.annotation.Contended("w")
|
||||
volatile int parking; // nonzero if parked in awaitWork
|
||||
|
||||
// Support for atomic operations
|
||||
private static final Unsafe U;
|
||||
@ -1248,11 +1223,11 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
*/
|
||||
WorkQueue(ForkJoinWorkerThread owner, int id, int cfg,
|
||||
boolean clearThreadLocals) {
|
||||
array = new ForkJoinTask<?>[owner == null ?
|
||||
INITIAL_EXTERNAL_QUEUE_CAPACITY :
|
||||
INITIAL_QUEUE_CAPACITY];
|
||||
this.owner = owner;
|
||||
this.config = (clearThreadLocals) ? cfg | CLEAR_TLS : cfg;
|
||||
if ((this.owner = owner) == null) {
|
||||
array = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY];
|
||||
phase = id | IDLE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1279,27 +1254,27 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
* @throws RejectedExecutionException if array could not be resized
|
||||
*/
|
||||
final void push(ForkJoinTask<?> task, ForkJoinPool pool, boolean internal) {
|
||||
int s = top, b = base, m, cap, room; ForkJoinTask<?>[] a;
|
||||
if ((a = array) != null && (cap = a.length) > 0 && // else disabled
|
||||
task != null) {
|
||||
int pk = task.noUserHelp() + 1; // prev slot offset
|
||||
if ((room = (m = cap - 1) - (s - b)) >= 0) {
|
||||
int s = top, b = base, m, cap, room; ForkJoinTask<?>[] a, na;
|
||||
if ((a = array) != null && (cap = a.length) > 0) { // else disabled
|
||||
int k = (m = cap - 1) & s;
|
||||
if ((room = m - (s - b)) >= 0) {
|
||||
top = s + 1;
|
||||
long pos = slotOffset(m & s);
|
||||
long pos = slotOffset(k);
|
||||
if (!internal)
|
||||
U.putReference(a, pos, task); // inside lock
|
||||
else
|
||||
U.getAndSetReference(a, pos, task); // fully fenced
|
||||
if (room == 0) // resize
|
||||
growArray(a, cap, s);
|
||||
if (room == 0 && (na = growArray(a, cap, s)) != null)
|
||||
k = ((a = na).length - 1) & s; // resize
|
||||
}
|
||||
if (!internal)
|
||||
unlockPhase();
|
||||
if (room < 0)
|
||||
throw new RejectedExecutionException("Queue capacity exceeded");
|
||||
if ((room == 0 || a[m & (s - pk)] == null) &&
|
||||
pool != null)
|
||||
pool.signalWork(); // may have appeared empty
|
||||
if (pool != null &&
|
||||
(room == 0 ||
|
||||
U.getReferenceAcquire(a, slotOffset(m & (s - 1))) == null))
|
||||
pool.signalWork(a, k); // may have appeared empty
|
||||
}
|
||||
}
|
||||
|
||||
@ -1308,11 +1283,12 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
* @param a old array
|
||||
* @param cap old array capacity
|
||||
* @param s current top
|
||||
* @return new array, or null on failure
|
||||
*/
|
||||
private void growArray(ForkJoinTask<?>[] a, int cap, int s) {
|
||||
int newCap = cap << 1;
|
||||
private ForkJoinTask<?>[] growArray(ForkJoinTask<?>[] a, int cap, int s) {
|
||||
int newCap = (cap >= 1 << 16) ? cap << 1 : cap << 2;
|
||||
ForkJoinTask<?>[] newArray = null;
|
||||
if (a != null && a.length == cap && cap > 0 && newCap > 0) {
|
||||
ForkJoinTask<?>[] newArray = null;
|
||||
try {
|
||||
newArray = new ForkJoinTask<?>[newCap];
|
||||
} catch (OutOfMemoryError ex) {
|
||||
@ -1329,34 +1305,45 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
updateArray(newArray); // fully fenced
|
||||
}
|
||||
}
|
||||
return newArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes next task, if one exists, in order specified by mode,
|
||||
* so acts as either local-pop or local-poll. Called only by owner.
|
||||
* @param fifo nonzero if FIFO mode
|
||||
* Takes next task, if one exists, in lifo order.
|
||||
*/
|
||||
private ForkJoinTask<?> nextLocalTask(int fifo) {
|
||||
private ForkJoinTask<?> localPop() {
|
||||
ForkJoinTask<?> t = null;
|
||||
ForkJoinTask<?>[] a = array;
|
||||
int b = base, p = top, cap;
|
||||
if (p - b > 0 && a != null && (cap = a.length) > 0) {
|
||||
for (int m = cap - 1, s, nb;;) {
|
||||
if (fifo == 0 || (nb = b + 1) == p) {
|
||||
if ((t = (ForkJoinTask<?>)U.getAndSetReference(
|
||||
a, slotOffset(m & (s = p - 1)), null)) != null)
|
||||
updateTop(s); // else lost race for only task
|
||||
break;
|
||||
int s = top - 1, cap; long k; ForkJoinTask<?>[] a;
|
||||
if ((a = array) != null && (cap = a.length) > 0 &&
|
||||
U.getReference(a, k = slotOffset((cap - 1) & s)) != null &&
|
||||
(t = (ForkJoinTask<?>)U.getAndSetReference(a, k, null)) != null)
|
||||
updateTop(s);
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes next task, if one exists, in fifo order.
|
||||
*/
|
||||
private ForkJoinTask<?> localPoll() {
|
||||
ForkJoinTask<?> t = null;
|
||||
int p = top, cap; ForkJoinTask<?>[] a;
|
||||
if ((a = array) != null && (cap = a.length) > 0) {
|
||||
for (int b = base; p - b > 0; ) {
|
||||
int nb = b + 1;
|
||||
long k = slotOffset((cap - 1) & b);
|
||||
if (U.getReference(a, k) == null) {
|
||||
if (nb == p)
|
||||
break; // else base is lagging
|
||||
while (b == (b = U.getIntAcquire(this, BASE)))
|
||||
Thread.onSpinWait(); // spin to reduce memory traffic
|
||||
}
|
||||
if ((t = (ForkJoinTask<?>)U.getAndSetReference(
|
||||
a, slotOffset(m & b), null)) != null) {
|
||||
else if ((t = (ForkJoinTask<?>)
|
||||
U.getAndSetReference(a, k, null)) != null) {
|
||||
updateBase(nb);
|
||||
break;
|
||||
}
|
||||
while (b == (b = U.getIntAcquire(this, BASE)))
|
||||
Thread.onSpinWait(); // spin to reduce memory traffic
|
||||
if (p - b <= 0)
|
||||
break;
|
||||
else
|
||||
b = base;
|
||||
}
|
||||
}
|
||||
return t;
|
||||
@ -1364,10 +1351,9 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
|
||||
/**
|
||||
* Takes next task, if one exists, using configured mode.
|
||||
* (Always internal, never called for Common pool.)
|
||||
*/
|
||||
final ForkJoinTask<?> nextLocalTask() {
|
||||
return nextLocalTask(config & FIFO);
|
||||
return (config & FIFO) == 0 ? localPop() : localPoll();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1443,12 +1429,12 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
// specialized execution methods
|
||||
|
||||
/**
|
||||
* Runs the given task, as well as remaining local tasks.
|
||||
* Runs the given task, as well as remaining local tasks
|
||||
*/
|
||||
final void topLevelExec(ForkJoinTask<?> task, int fifo) {
|
||||
while (task != null) {
|
||||
task.doExec();
|
||||
task = nextLocalTask(fifo);
|
||||
task = (fifo != 0) ? localPoll() : localPop();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1578,7 +1564,7 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
* Cancels all local tasks. Called only by owner.
|
||||
*/
|
||||
final void cancelTasks() {
|
||||
for (ForkJoinTask<?> t; (t = nextLocalTask(0)) != null; ) {
|
||||
for (ForkJoinTask<?> t; (t = localPop()) != null; ) {
|
||||
try {
|
||||
t.cancel(false);
|
||||
} catch (Throwable ignore) {
|
||||
@ -1780,7 +1766,8 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
* @param w caller's WorkQueue
|
||||
*/
|
||||
final void registerWorker(WorkQueue w) {
|
||||
if (w != null && (runState & STOP) == 0L) {
|
||||
if (w != null) {
|
||||
w.array = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY];
|
||||
ThreadLocalRandom.localInit();
|
||||
int seed = w.stackPred = ThreadLocalRandom.getProbe();
|
||||
int phaseSeq = seed & ~((IDLE << 1) - 1); // initial phase tag
|
||||
@ -1858,17 +1845,18 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
}
|
||||
if ((tryTerminate(false, false) & STOP) == 0L &&
|
||||
phase != 0 && w != null && w.source != DROPPED) {
|
||||
signalWork(); // possibly replace
|
||||
w.cancelTasks(); // clean queue
|
||||
signalWork(null, 0); // possibly replace
|
||||
}
|
||||
if (ex != null)
|
||||
ForkJoinTask.rethrow(ex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Releases an idle worker, or creates one if not enough exist.
|
||||
* Releases an idle worker, or creates one if not enough exist,
|
||||
* giving up if array a is nonnull and task at a[k] already taken.
|
||||
*/
|
||||
final void signalWork() {
|
||||
final void signalWork(ForkJoinTask<?>[] a, int k) {
|
||||
int pc = parallelism;
|
||||
for (long c = ctl;;) {
|
||||
WorkQueue[] qs = queues;
|
||||
@ -1884,13 +1872,15 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
if (sp == 0) {
|
||||
if ((short)(c >>> TC_SHIFT) >= pc)
|
||||
break;
|
||||
nc = ((c + TC_UNIT) & TC_MASK);
|
||||
nc = ((c + TC_UNIT) & TC_MASK) | ac;
|
||||
}
|
||||
else if ((v = w) == null)
|
||||
break;
|
||||
else
|
||||
nc = (v.stackPred & LMASK) | (c & TC_MASK);
|
||||
if (c == (c = compareAndExchangeCtl(c, nc | ac))) {
|
||||
nc = (v.stackPred & LMASK) | (c & TC_MASK) | ac;
|
||||
if (a != null && k < a.length && k >= 0 && a[k] == null)
|
||||
break;
|
||||
if (c == (c = ctl) && c == (c = compareAndExchangeCtl(c, nc))) {
|
||||
if (v == null)
|
||||
createWorker();
|
||||
else {
|
||||
@ -1973,178 +1963,196 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
* @param w caller's WorkQueue (may be null on failed initialization)
|
||||
*/
|
||||
final void runWorker(WorkQueue w) {
|
||||
if (w != null) {
|
||||
int phase = w.phase, r = w.stackPred; // seed from registerWorker
|
||||
int fifo = w.config & FIFO, nsteals = 0, src = -1;
|
||||
for (;;) {
|
||||
WorkQueue[] qs;
|
||||
if (w != null && w.phase != 0) { // else unregistered
|
||||
WorkQueue[] qs;
|
||||
int r = w.stackPred; // seed from registerWorker
|
||||
int fifo = (int)config & FIFO, rescans = 0, inactive = 0, taken = 0, n;
|
||||
while ((runState & STOP) == 0L && (qs = queues) != null &&
|
||||
(n = qs.length) > 0) {
|
||||
int i = r, step = (r >>> 16) | 1;
|
||||
r ^= r << 13; r ^= r >>> 17; r ^= r << 5; // xorshift
|
||||
if ((runState & STOP) != 0L || (qs = queues) == null)
|
||||
break;
|
||||
int n = qs.length, i = r, step = (r >>> 16) | 1;
|
||||
boolean rescan = false;
|
||||
scan: for (int l = n; l > 0; --l, i += step) { // scan queues
|
||||
int j, cap; WorkQueue q; ForkJoinTask<?>[] a;
|
||||
if ((q = qs[j = i & (n - 1)]) != null &&
|
||||
(a = q.array) != null && (cap = a.length) > 0) {
|
||||
for (int m = cap - 1, pb = -1, b = q.base;;) {
|
||||
ForkJoinTask<?> t; long k;
|
||||
scan: for (int j = n; j != 0; --j, i += step) {
|
||||
WorkQueue q; int qid;
|
||||
if ((q = qs[qid = i & (n - 1)]) != null) {
|
||||
ForkJoinTask<?>[] a; int cap; // poll queue
|
||||
while ((a = q.array) != null && (cap = a.length) > 0) {
|
||||
int b, nb, nk; long bp; ForkJoinTask<?> t;
|
||||
t = (ForkJoinTask<?>)U.getReferenceAcquire(
|
||||
a, k = slotOffset(m & b));
|
||||
if (b != (b = q.base) || t == null ||
|
||||
!U.compareAndSetReference(a, k, t, null)) {
|
||||
if (a[b & m] == null) {
|
||||
if (rescan) // end of run
|
||||
break scan;
|
||||
if (a[(b + 1) & m] == null &&
|
||||
a[(b + 2) & m] == null) {
|
||||
break; // probably empty
|
||||
a, bp = slotOffset((cap - 1) & (b = q.base)));
|
||||
long np = slotOffset(nk = (nb = b + 1) & (cap - 1));
|
||||
if (q.base == b) { // else inconsistent
|
||||
if (t == null) {
|
||||
if (q.array == a) { // else resized
|
||||
if (rescans > 0) // ran or stalled
|
||||
break scan;
|
||||
if (U.getReference(a, np) == null &&
|
||||
(rescans >= 0 ||
|
||||
(U.getReferenceAcquire(a, bp) == null &&
|
||||
q.top == q.base)))
|
||||
break;
|
||||
rescans = 1; // may be stalled
|
||||
}
|
||||
if (pb == (pb = b)) { // track progress
|
||||
rescan = true; // stalled; reorder scan
|
||||
}
|
||||
else if (inactive != 0) {
|
||||
if ((inactive = tryReactivate(w)) != 0) {
|
||||
rescans = 1; // can't take yet
|
||||
break scan;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
boolean propagate;
|
||||
int nb = q.base = b + 1, prevSrc = src;
|
||||
w.nsteals = ++nsteals;
|
||||
w.source = src = j; // volatile
|
||||
rescan = true;
|
||||
int nh = t.noUserHelp();
|
||||
if (propagate =
|
||||
(prevSrc != src || nh != 0) && a[nb & m] != null)
|
||||
signalWork();
|
||||
w.topLevelExec(t, fifo);
|
||||
if ((b = q.base) != nb && !propagate)
|
||||
break scan; // reduce interference
|
||||
else if (U.compareAndSetReference(a, bp, t, null)) {
|
||||
q.base = nb;
|
||||
Object nt = U.getReferenceAcquire(a, np);
|
||||
w.source = qid;
|
||||
rescans = 1;
|
||||
++taken;
|
||||
if (nt != null && // confirm a[nk]
|
||||
U.getReferenceAcquire(a, np) == nt)
|
||||
signalWork(a, nk); // propagate
|
||||
w.topLevelExec(t, fifo);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!rescan) {
|
||||
if (((phase = deactivate(w, phase)) & IDLE) != 0)
|
||||
break;
|
||||
src = -1; // re-enable propagation
|
||||
if (rescans >= 0)
|
||||
--rescans;
|
||||
else if (inactive == 0) {
|
||||
if ((inactive = deactivate(w, taken)) != 0)
|
||||
taken = 0;
|
||||
}
|
||||
else if (awaitWork(w) == 0)
|
||||
inactive = rescans = 0;
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deactivates and if necessary awaits signal or termination.
|
||||
* Tries to deactivate worker, keeping active on contention
|
||||
*
|
||||
* @param w the worker
|
||||
* @param phase current phase
|
||||
* @return current phase, with IDLE set if worker should exit
|
||||
* @param w the work queue
|
||||
* @param taken number of stolen tasks since last deactivation
|
||||
* @return nonzero if inactive
|
||||
*/
|
||||
private int deactivate(WorkQueue w, int phase) {
|
||||
if (w == null) // currently impossible
|
||||
return IDLE;
|
||||
int p = phase | IDLE, activePhase = phase + (IDLE << 1);
|
||||
long pc = ctl, qc = (activePhase & LMASK) | ((pc - RC_UNIT) & UMASK);
|
||||
int sp = w.stackPred = (int)pc; // set ctl stack link
|
||||
w.phase = p;
|
||||
if (!compareAndSetCtl(pc, qc)) // try to enqueue
|
||||
return w.phase = phase; // back out on possible signal
|
||||
int ac = (short)(qc >>> RC_SHIFT), n; long e; WorkQueue[] qs;
|
||||
if (((e = runState) & STOP) != 0L ||
|
||||
((e & SHUTDOWN) != 0L && ac == 0 && quiescent() > 0) ||
|
||||
(qs = queues) == null || (n = qs.length) <= 0)
|
||||
return IDLE; // terminating
|
||||
|
||||
for (int prechecks = Math.min(ac, 2), // reactivation threshold
|
||||
k = Math.max(n + (n << 1), SPIN_WAITS << 1);;) {
|
||||
WorkQueue q; int cap; ForkJoinTask<?>[] a; long c;
|
||||
if (w.phase == activePhase)
|
||||
return activePhase;
|
||||
if (--k < 0)
|
||||
return awaitWork(w, p); // block, drop, or exit
|
||||
if ((q = qs[k & (n - 1)]) == null)
|
||||
Thread.onSpinWait();
|
||||
else if ((a = q.array) != null && (cap = a.length) > 0 &&
|
||||
a[q.base & (cap - 1)] != null && --prechecks < 0 &&
|
||||
(int)(c = ctl) == activePhase &&
|
||||
compareAndSetCtl(c, (sp & LMASK) | ((c + RC_UNIT) & UMASK)))
|
||||
return w.phase = activePhase; // reactivate
|
||||
private int deactivate(WorkQueue w, int taken) {
|
||||
int inactive = 0, phase;
|
||||
if (w != null && (inactive = (phase = w.phase) & IDLE) == 0) {
|
||||
long sp = (phase + (IDLE << 1)) & LMASK, pc, c;
|
||||
w.phase = phase | IDLE;
|
||||
w.stackPred = (int)(pc = ctl); // set ctl stack link
|
||||
if (!compareAndSetCtl( // try to enqueue
|
||||
pc, c = ((pc - RC_UNIT) & UMASK) | sp))
|
||||
w.phase = phase; // back out on contention
|
||||
else {
|
||||
if (taken != 0) {
|
||||
w.nsteals += taken;
|
||||
if ((w.config & CLEAR_TLS) != 0 &&
|
||||
(Thread.currentThread() instanceof ForkJoinWorkerThread f))
|
||||
f.resetThreadLocals(); // (instanceof check always true)
|
||||
}
|
||||
if (((c & RC_MASK) == 0L && quiescent() > 0) || taken == 0)
|
||||
inactive = w.phase & IDLE; // check quiescent termination
|
||||
else { // spin for approx 1 scan cost
|
||||
int tc = (short)(c >>> TC_SHIFT);
|
||||
int spins = Math.max((tc << 1) + tc, SPIN_WAITS);
|
||||
while ((inactive = w.phase & IDLE) != 0 && --spins != 0)
|
||||
Thread.onSpinWait();
|
||||
}
|
||||
}
|
||||
}
|
||||
return inactive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reactivates worker w if it is currently top of ctl stack
|
||||
*
|
||||
* @param w the work queue
|
||||
* @return 0 if now active
|
||||
*/
|
||||
private int tryReactivate(WorkQueue w) {
|
||||
int inactive = 0;
|
||||
if (w != null) { // always true; hoist checks
|
||||
int sp = w.stackPred, phase, activePhase; long c;
|
||||
if ((inactive = (phase = w.phase) & IDLE) != 0 &&
|
||||
(int)(c = ctl) == (activePhase = phase + IDLE) &&
|
||||
compareAndSetCtl(c, (sp & LMASK) | ((c + RC_UNIT) & UMASK))) {
|
||||
w.phase = activePhase;
|
||||
inactive = 0;
|
||||
}
|
||||
}
|
||||
return inactive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Awaits signal or termination.
|
||||
*
|
||||
* @param w the work queue
|
||||
* @param p current phase (known to be idle)
|
||||
* @return current phase, with IDLE set if worker should exit
|
||||
* @return 0 if now active
|
||||
*/
|
||||
private int awaitWork(WorkQueue w, int p) {
|
||||
if (w != null) {
|
||||
ForkJoinWorkerThread t; long deadline;
|
||||
if ((w.config & CLEAR_TLS) != 0 && (t = w.owner) != null)
|
||||
t.resetThreadLocals(); // clear before reactivate
|
||||
if ((ctl & RC_MASK) > 0L)
|
||||
deadline = 0L;
|
||||
else if ((deadline =
|
||||
(((w.source != INVALID_ID) ? keepAlive : TIMEOUT_SLOP)) +
|
||||
System.currentTimeMillis()) == 0L)
|
||||
deadline = 1L; // avoid zero
|
||||
int activePhase = p + IDLE;
|
||||
if ((p = w.phase) != activePhase && (runState & STOP) == 0L) {
|
||||
private int awaitWork(WorkQueue w) {
|
||||
int inactive = 0, phase;
|
||||
if (w != null) { // always true; hoist checks
|
||||
long waitTime = (w.source == INVALID_ID) ? 0L : keepAlive;
|
||||
if ((inactive = (phase = w.phase) & IDLE) != 0) {
|
||||
LockSupport.setCurrentBlocker(this);
|
||||
w.parking = 1; // enable unpark
|
||||
while ((p = w.phase) != activePhase) {
|
||||
boolean trimmable = false; int trim;
|
||||
Thread.interrupted(); // clear status
|
||||
int activePhase = phase + IDLE;
|
||||
for (long deadline = 0L;;) {
|
||||
Thread.interrupted(); // clear status
|
||||
if ((runState & STOP) != 0L)
|
||||
break;
|
||||
if (deadline != 0L) {
|
||||
if ((trim = tryTrim(w, p, deadline)) > 0)
|
||||
break;
|
||||
else if (trim < 0)
|
||||
deadline = 0L;
|
||||
else
|
||||
trimmable = true;
|
||||
boolean trimmable = false; // use timed wait if trimmable
|
||||
long d = 0L, c;
|
||||
if (((c = ctl) & RC_MASK) == 0L && (int)c == activePhase) {
|
||||
long now = System.currentTimeMillis();
|
||||
if (deadline == 0L)
|
||||
deadline = waitTime + now;
|
||||
if (deadline - now <= TIMEOUT_SLOP) {
|
||||
if (tryTrim(w, c, activePhase))
|
||||
break;
|
||||
continue; // lost race to trim
|
||||
}
|
||||
d = deadline;
|
||||
trimmable = true;
|
||||
}
|
||||
U.park(trimmable, deadline);
|
||||
w.parking = 1; // enable unpark and recheck
|
||||
if ((inactive = w.phase & IDLE) != 0)
|
||||
U.park(trimmable, d);
|
||||
w.parking = 0; // close unpark window
|
||||
if (inactive == 0 || (inactive = w.phase & IDLE) == 0)
|
||||
break;
|
||||
}
|
||||
w.parking = 0;
|
||||
LockSupport.setCurrentBlocker(null);
|
||||
}
|
||||
}
|
||||
return p;
|
||||
return inactive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to remove and deregister worker after timeout, and release
|
||||
* another to do the same.
|
||||
* @return > 0: trimmed, < 0 : not trimmable, else 0
|
||||
* another to do the same unless new tasks are found.
|
||||
*/
|
||||
private int tryTrim(WorkQueue w, int phase, long deadline) {
|
||||
long c, nc; int stat, activePhase, vp, i; WorkQueue[] vs; WorkQueue v;
|
||||
if ((activePhase = phase + IDLE) != (int)(c = ctl) || w == null)
|
||||
stat = -1; // no longer ctl top
|
||||
else if (deadline - System.currentTimeMillis() >= TIMEOUT_SLOP)
|
||||
stat = 0; // spurious wakeup
|
||||
else if (!compareAndSetCtl(
|
||||
c, nc = ((w.stackPred & LMASK) | (RC_MASK & c) |
|
||||
(TC_MASK & (c - TC_UNIT)))))
|
||||
stat = -1; // lost race to signaller
|
||||
else {
|
||||
stat = 1;
|
||||
w.source = DROPPED;
|
||||
w.phase = activePhase;
|
||||
if ((vp = (int)nc) != 0 && (vs = queues) != null &&
|
||||
vs.length > (i = vp & SMASK) && (v = vs[i]) != null &&
|
||||
compareAndSetCtl( // try to wake up next waiter
|
||||
nc, ((UMASK & (nc + RC_UNIT)) |
|
||||
(nc & TC_MASK) | (v.stackPred & LMASK)))) {
|
||||
v.source = INVALID_ID; // enable cascaded timeouts
|
||||
v.phase = vp;
|
||||
U.unpark(v.owner);
|
||||
private boolean tryTrim(WorkQueue w, long c, int activePhase) {
|
||||
if (w != null) {
|
||||
int vp, i; WorkQueue[] vs; WorkQueue v;
|
||||
long nc = ((w.stackPred & LMASK) |
|
||||
((RC_MASK & c) | (TC_MASK & (c - TC_UNIT))));
|
||||
if (compareAndSetCtl(c, nc)) {
|
||||
w.source = DROPPED;
|
||||
w.phase = activePhase;
|
||||
if ((vp = (int)nc) != 0 && (vs = queues) != null &&
|
||||
vs.length > (i = vp & SMASK) && (v = vs[i]) != null &&
|
||||
compareAndSetCtl( // try to wake up next waiter
|
||||
nc, ((v.stackPred & LMASK) |
|
||||
((UMASK & (nc + RC_UNIT)) | (nc & TC_MASK))))) {
|
||||
v.source = INVALID_ID; // enable cascaded timeouts
|
||||
v.phase = vp;
|
||||
U.unpark(v.owner);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return stat;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2561,52 +2569,35 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
|
||||
/**
|
||||
* Finds and locks a WorkQueue for an external submitter, or
|
||||
* throws RejectedExecutionException if shutdown or terminating.
|
||||
* @param r current ThreadLocalRandom.getProbe() value
|
||||
* throws RejectedExecutionException if shutdown
|
||||
* @param rejectOnShutdown true if RejectedExecutionException
|
||||
* should be thrown when shutdown (else only if terminating)
|
||||
* should be thrown when shutdown
|
||||
*/
|
||||
private WorkQueue submissionQueue(int r, boolean rejectOnShutdown) {
|
||||
int reuse; // nonzero if prefer create
|
||||
if ((reuse = r) == 0) {
|
||||
ThreadLocalRandom.localInit(); // initialize caller's probe
|
||||
final WorkQueue externalSubmissionQueue(boolean rejectOnShutdown) {
|
||||
int r;
|
||||
if ((r = ThreadLocalRandom.getProbe()) == 0) {
|
||||
ThreadLocalRandom.localInit(); // initialize caller's probe
|
||||
r = ThreadLocalRandom.getProbe();
|
||||
}
|
||||
for (int probes = 0; ; ++probes) {
|
||||
int n, i, id; WorkQueue[] qs; WorkQueue q;
|
||||
if ((qs = queues) == null)
|
||||
break;
|
||||
if ((n = qs.length) <= 0)
|
||||
for (;;) {
|
||||
WorkQueue q; WorkQueue[] qs; int n, id, i;
|
||||
if ((qs = queues) == null || (n = qs.length) <= 0)
|
||||
break;
|
||||
if ((q = qs[i = (id = r & EXTERNAL_ID_MASK) & (n - 1)]) == null) {
|
||||
WorkQueue w = new WorkQueue(null, id, 0, false);
|
||||
w.phase = id;
|
||||
boolean reject = ((lockRunState() & SHUTDOWN) != 0 &&
|
||||
rejectOnShutdown);
|
||||
if (!reject && queues == qs && qs[i] == null)
|
||||
q = qs[i] = w; // else lost race to install
|
||||
WorkQueue newq = new WorkQueue(null, id, 0, false);
|
||||
lockRunState();
|
||||
if (qs[i] == null && queues == qs)
|
||||
q = qs[i] = newq; // else lost race to install
|
||||
unlockRunState();
|
||||
if (q != null)
|
||||
return q;
|
||||
if (reject)
|
||||
}
|
||||
if (q != null && q.tryLockPhase()) {
|
||||
if (rejectOnShutdown && (runState & SHUTDOWN) != 0L) {
|
||||
q.unlockPhase(); // check while q lock held
|
||||
break;
|
||||
reuse = 0;
|
||||
}
|
||||
if (reuse == 0 || !q.tryLockPhase()) { // move index
|
||||
if (reuse == 0) {
|
||||
if (probes >= n >> 1)
|
||||
reuse = r; // stop prefering free slot
|
||||
}
|
||||
else if (q != null)
|
||||
reuse = 0; // probe on collision
|
||||
r = ThreadLocalRandom.advanceProbe(r);
|
||||
}
|
||||
else if (rejectOnShutdown && (runState & SHUTDOWN) != 0L) {
|
||||
q.unlockPhase(); // check while q lock held
|
||||
break;
|
||||
}
|
||||
else
|
||||
return q;
|
||||
}
|
||||
r = ThreadLocalRandom.advanceProbe(r); // move
|
||||
}
|
||||
throw new RejectedExecutionException();
|
||||
}
|
||||
@ -2620,24 +2611,12 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
}
|
||||
else { // find and lock queue
|
||||
internal = false;
|
||||
q = submissionQueue(ThreadLocalRandom.getProbe(), true);
|
||||
q = externalSubmissionQueue(true);
|
||||
}
|
||||
q.push(task, signalIfEmpty ? this : null, internal);
|
||||
return task;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns queue for an external submission, bypassing call to
|
||||
* submissionQueue if already established and unlocked.
|
||||
*/
|
||||
final WorkQueue externalSubmissionQueue(boolean rejectOnShutdown) {
|
||||
WorkQueue[] qs; WorkQueue q; int n;
|
||||
int r = ThreadLocalRandom.getProbe();
|
||||
return (((qs = queues) != null && (n = qs.length) > 0 &&
|
||||
(q = qs[r & EXTERNAL_ID_MASK & (n - 1)]) != null && r != 0 &&
|
||||
q.tryLockPhase()) ? q : submissionQueue(r, rejectOnShutdown));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns queue for an external thread, if one exists that has
|
||||
* possibly ever submitted to the given pool (nonzero probe), or
|
||||
@ -3310,11 +3289,14 @@ public class ForkJoinPool extends AbstractExecutorService
|
||||
* @since 19
|
||||
*/
|
||||
public int setParallelism(int size) {
|
||||
int prevSize;
|
||||
if (size < 1 || size > MAX_CAP)
|
||||
throw new IllegalArgumentException();
|
||||
if ((config & PRESET_SIZE) != 0)
|
||||
throw new UnsupportedOperationException("Cannot override System property");
|
||||
return getAndSetParallelism(size);
|
||||
if ((prevSize = getAndSetParallelism(size)) < size)
|
||||
signalWork(null, 0); // trigger worker activation
|
||||
return prevSize;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -588,13 +588,15 @@ public class LinkedTransferQueue<E> extends AbstractQueue<E>
|
||||
do {
|
||||
m = p.item;
|
||||
q = p.next;
|
||||
if (p.isData != haveData && haveData != (m != null) &&
|
||||
p.cmpExItem(m, e) == m) {
|
||||
Thread w = p.waiter; // matched complementary node
|
||||
if (p != h && h == cmpExHead(h, (q == null) ? p : q))
|
||||
h.next = h; // advance head; self-link old
|
||||
LockSupport.unpark(w);
|
||||
return m;
|
||||
if (p.isData != haveData && haveData != (m != null)) {
|
||||
if (p.cmpExItem(m, e) == m) {
|
||||
Thread w = p.waiter; // matched complementary node
|
||||
if (p != h && h == cmpExHead(h, (q == null) ? p : q))
|
||||
h.next = h; // advance head; self-link old
|
||||
LockSupport.unpark(w);
|
||||
return m;
|
||||
}
|
||||
continue restart;
|
||||
} else if (q == null) {
|
||||
if (ns == 0L) // try to append unless immediate
|
||||
break restart;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user