Merge
3
.hgtags
@ -349,3 +349,6 @@ f9bcdce2df26678c3fe468130b535c0342c69b89 jdk-9+99
|
||||
086c682bd8c5f195c324f61e2c61fbcd0226d63b jdk-9+104
|
||||
db483b34fa7148d257a429acddbde9c13687dcae jdk-9+105
|
||||
6c644cca3f3fc2763e2ff7d669849a75d34543ba jdk-9+106
|
||||
1c076468bf7dad5b8f2ee5dcf66e2279caa3e208 jdk-9+107
|
||||
257b579d813201682931d6b42f0445ffe5b4210d jdk-9+108
|
||||
c870cb782aca71093d2584376f27f0cfbfec0e3a jdk-9+109
|
||||
|
||||
@ -349,3 +349,6 @@ c4d72a1620835b5d657b7b6792c2879367d0154f jdk-9+101
|
||||
9a38f8b4ba220708db198d08d82fd2144a64777d jdk-9+104
|
||||
be58b02c11f90b88c67e4d0e2cb5e4cf2d9b3c57 jdk-9+105
|
||||
54575d8783b3a39a2d710c28cda675d44261f9d9 jdk-9+106
|
||||
4d65eba233a8730f913734a6804910b842d2cb54 jdk-9+107
|
||||
c7be2a78c31b3b6132f2f5e9e4b3d3bb1c20245c jdk-9+108
|
||||
1787bdaabb2b6f4193406e25a50cb0419ea8e8f3 jdk-9+109
|
||||
|
||||
@ -123,12 +123,16 @@ AC_DEFUN_ONCE([FLAGS_SETUP_INIT_FLAGS],
|
||||
[
|
||||
# COMPILER_TARGET_BITS_FLAG : option for selecting 32- or 64-bit output
|
||||
# COMPILER_COMMAND_FILE_FLAG : option for passing a command file to the compiler
|
||||
# COMPILER_BINDCMD_FILE_FLAG : option for specifying a file which saves the binder
|
||||
# commands produced by the link step (currently AIX only)
|
||||
if test "x$TOOLCHAIN_TYPE" = xxlc; then
|
||||
COMPILER_TARGET_BITS_FLAG="-q"
|
||||
COMPILER_COMMAND_FILE_FLAG="-f"
|
||||
COMPILER_BINDCMD_FILE_FLAG="-bloadmap:"
|
||||
else
|
||||
COMPILER_TARGET_BITS_FLAG="-m"
|
||||
COMPILER_COMMAND_FILE_FLAG="@"
|
||||
COMPILER_BINDCMD_FILE_FLAG=""
|
||||
|
||||
# The solstudio linker does not support @-files.
|
||||
if test "x$TOOLCHAIN_TYPE" = xsolstudio; then
|
||||
@ -152,6 +156,7 @@ AC_DEFUN_ONCE([FLAGS_SETUP_INIT_FLAGS],
|
||||
fi
|
||||
AC_SUBST(COMPILER_TARGET_BITS_FLAG)
|
||||
AC_SUBST(COMPILER_COMMAND_FILE_FLAG)
|
||||
AC_SUBST(COMPILER_BINDCMD_FILE_FLAG)
|
||||
|
||||
# FIXME: figure out if we should select AR flags depending on OS or toolchain.
|
||||
if test "x$OPENJDK_TARGET_OS" = xmacosx; then
|
||||
@ -294,10 +299,23 @@ AC_DEFUN_ONCE([FLAGS_SETUP_COMPILER_FLAGS_FOR_LIBS],
|
||||
SET_SHARED_LIBRARY_NAME='-h [$]1'
|
||||
SET_SHARED_LIBRARY_MAPFILE='-M[$]1'
|
||||
elif test "x$TOOLCHAIN_TYPE" = xxlc; then
|
||||
PICFLAG="-qpic=large"
|
||||
# '-qpic' defaults to 'qpic=small'. This means that the compiler generates only
|
||||
# one instruction for accessing the TOC. If the TOC grows larger than 64K, the linker
|
||||
# will have to patch this single instruction with a call to some out-of-order code which
|
||||
# does the load from the TOC. This is of course slow. But in that case we also would have
|
||||
# to use '-bbigtoc' for linking anyway so we could also change the PICFLAG to 'qpic=large'.
|
||||
# With 'qpic=large' the compiler will by default generate a two-instruction sequence which
|
||||
# can be patched directly by the linker and does not require a jump to out-of-order code.
|
||||
# Another alternative instead of using 'qpic=large -bbigtoc' may be to use '-qminimaltoc'
|
||||
# instead. This creates a distinct TOC for every compilation unit (and thus requires two
|
||||
# loads for accessing a global variable). But there are rumors that this may be seen as a
|
||||
# 'performance feature' because of improved code locality of the symbols used in a
|
||||
# compilation unit.
|
||||
PICFLAG="-qpic"
|
||||
JVM_CFLAGS="$JVM_CFLAGS $PICFLAG"
|
||||
C_FLAG_REORDER=''
|
||||
CXX_FLAG_REORDER=''
|
||||
SHARED_LIBRARY_FLAGS="-qmkshrobj"
|
||||
SHARED_LIBRARY_FLAGS="-qmkshrobj -bM:SRE -bnoentry"
|
||||
SET_EXECUTABLE_ORIGIN=""
|
||||
SET_SHARED_LIBRARY_ORIGIN=''
|
||||
SET_SHARED_LIBRARY_NAME=''
|
||||
@ -835,7 +853,7 @@ AC_DEFUN_ONCE([FLAGS_SETUP_COMPILER_FLAGS_FOR_JDK],
|
||||
LDFLAGS_CXX_SOLSTUDIO="-norunpath"
|
||||
LDFLAGS_CXX_JDK="$LDFLAGS_CXX_JDK $LDFLAGS_CXX_SOLSTUDIO -xnolib"
|
||||
elif test "x$TOOLCHAIN_TYPE" = xxlc; then
|
||||
LDFLAGS_XLC="-brtl -bnolibpath -bexpall -bernotok"
|
||||
LDFLAGS_XLC="-b64 -brtl -bnolibpath -bexpall -bernotok"
|
||||
LDFLAGS_JDK="${LDFLAGS_JDK} $LDFLAGS_XLC"
|
||||
fi
|
||||
|
||||
@ -891,6 +909,7 @@ AC_DEFUN_ONCE([FLAGS_SETUP_COMPILER_FLAGS_FOR_JDK],
|
||||
AC_SUBST(JDKLIB_LIBS)
|
||||
AC_SUBST(JDKEXE_LIBS)
|
||||
AC_SUBST(LDFLAGS_CXX_JDK)
|
||||
AC_SUBST(LDFLAGS_HASH_STYLE)
|
||||
|
||||
LDFLAGS_TESTLIB="$LDFLAGS_JDKLIB"
|
||||
LDFLAGS_TESTEXE="$LDFLAGS_JDKEXE"
|
||||
|
||||
@ -701,6 +701,7 @@ COMPILER_SUPPORTS_TARGET_BITS_FLAG
|
||||
ZERO_ARCHFLAG
|
||||
LDFLAGS_TESTEXE
|
||||
LDFLAGS_TESTLIB
|
||||
LDFLAGS_HASH_STYLE
|
||||
LDFLAGS_CXX_JDK
|
||||
JDKEXE_LIBS
|
||||
JDKLIB_LIBS
|
||||
@ -743,6 +744,7 @@ EXE_OUT_OPTION
|
||||
CC_OUT_OPTION
|
||||
STRIPFLAGS
|
||||
ARFLAGS
|
||||
COMPILER_BINDCMD_FILE_FLAG
|
||||
COMPILER_COMMAND_FILE_FLAG
|
||||
COMPILER_TARGET_BITS_FLAG
|
||||
JT_HOME
|
||||
@ -4003,7 +4005,7 @@ apt_help() {
|
||||
devkit)
|
||||
PKGHANDLER_COMMAND="sudo apt-get install build-essential" ;;
|
||||
openjdk)
|
||||
PKGHANDLER_COMMAND="sudo apt-get install openjdk-7-jdk" ;;
|
||||
PKGHANDLER_COMMAND="sudo apt-get install openjdk-8-jdk" ;;
|
||||
alsa)
|
||||
PKGHANDLER_COMMAND="sudo apt-get install libasound2-dev" ;;
|
||||
cups)
|
||||
@ -4024,7 +4026,7 @@ yum_help() {
|
||||
devkit)
|
||||
PKGHANDLER_COMMAND="sudo yum groupinstall \"Development Tools\"" ;;
|
||||
openjdk)
|
||||
PKGHANDLER_COMMAND="sudo yum install java-1.7.0-openjdk" ;;
|
||||
PKGHANDLER_COMMAND="sudo yum install java-1.8.0-openjdk-devel" ;;
|
||||
alsa)
|
||||
PKGHANDLER_COMMAND="sudo yum install alsa-lib-devel" ;;
|
||||
cups)
|
||||
@ -4230,7 +4232,7 @@ pkgadd_help() {
|
||||
|
||||
|
||||
#
|
||||
# Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -4860,7 +4862,7 @@ VS_SDK_PLATFORM_NAME_2013=
|
||||
#CUSTOM_AUTOCONF_INCLUDE
|
||||
|
||||
# Do not change or remove the following line, it is needed for consistency checks:
|
||||
DATE_WHEN_GENERATED=1455271513
|
||||
DATE_WHEN_GENERATED=1457684806
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
@ -15116,6 +15118,10 @@ $as_echo "$COMPILE_TYPE" >&6; }
|
||||
REQUIRED_OS_NAME=Darwin
|
||||
REQUIRED_OS_VERSION=11.2
|
||||
fi
|
||||
if test "x$OPENJDK_TARGET_OS" = "xaix"; then
|
||||
REQUIRED_OS_NAME=AIX
|
||||
REQUIRED_OS_VERSION=7.1
|
||||
fi
|
||||
|
||||
|
||||
|
||||
@ -45391,12 +45397,16 @@ $as_echo "$tool_specified" >&6; }
|
||||
|
||||
# COMPILER_TARGET_BITS_FLAG : option for selecting 32- or 64-bit output
|
||||
# COMPILER_COMMAND_FILE_FLAG : option for passing a command file to the compiler
|
||||
# COMPILER_BINDCMD_FILE_FLAG : option for specifying a file which saves the binder
|
||||
# commands produced by the link step (currently AIX only)
|
||||
if test "x$TOOLCHAIN_TYPE" = xxlc; then
|
||||
COMPILER_TARGET_BITS_FLAG="-q"
|
||||
COMPILER_COMMAND_FILE_FLAG="-f"
|
||||
COMPILER_BINDCMD_FILE_FLAG="-bloadmap:"
|
||||
else
|
||||
COMPILER_TARGET_BITS_FLAG="-m"
|
||||
COMPILER_COMMAND_FILE_FLAG="@"
|
||||
COMPILER_BINDCMD_FILE_FLAG=""
|
||||
|
||||
# The solstudio linker does not support @-files.
|
||||
if test "x$TOOLCHAIN_TYPE" = xsolstudio; then
|
||||
@ -45424,6 +45434,7 @@ $as_echo "no" >&6; }
|
||||
|
||||
|
||||
|
||||
|
||||
# FIXME: figure out if we should select AR flags depending on OS or toolchain.
|
||||
if test "x$OPENJDK_TARGET_OS" = xmacosx; then
|
||||
ARFLAGS="-r"
|
||||
@ -46198,10 +46209,23 @@ $as_echo "$ac_cv_c_bigendian" >&6; }
|
||||
SET_SHARED_LIBRARY_NAME='-h $1'
|
||||
SET_SHARED_LIBRARY_MAPFILE='-M$1'
|
||||
elif test "x$TOOLCHAIN_TYPE" = xxlc; then
|
||||
PICFLAG="-qpic=large"
|
||||
# '-qpic' defaults to 'qpic=small'. This means that the compiler generates only
|
||||
# one instruction for accessing the TOC. If the TOC grows larger than 64K, the linker
|
||||
# will have to patch this single instruction with a call to some out-of-order code which
|
||||
# does the load from the TOC. This is of course slow. But in that case we also would have
|
||||
# to use '-bbigtoc' for linking anyway so we could also change the PICFLAG to 'qpic=large'.
|
||||
# With 'qpic=large' the compiler will by default generate a two-instruction sequence which
|
||||
# can be patched directly by the linker and does not require a jump to out-of-order code.
|
||||
# Another alternative instead of using 'qpic=large -bbigtoc' may be to use '-qminimaltoc'
|
||||
# instead. This creates a distinct TOC for every compilation unit (and thus requires two
|
||||
# loads for accessing a global variable). But there are rumors that this may be seen as a
|
||||
# 'performance feature' because of improved code locality of the symbols used in a
|
||||
# compilation unit.
|
||||
PICFLAG="-qpic"
|
||||
JVM_CFLAGS="$JVM_CFLAGS $PICFLAG"
|
||||
C_FLAG_REORDER=''
|
||||
CXX_FLAG_REORDER=''
|
||||
SHARED_LIBRARY_FLAGS="-qmkshrobj"
|
||||
SHARED_LIBRARY_FLAGS="-qmkshrobj -bM:SRE -bnoentry"
|
||||
SET_EXECUTABLE_ORIGIN=""
|
||||
SET_SHARED_LIBRARY_ORIGIN=''
|
||||
SET_SHARED_LIBRARY_NAME=''
|
||||
@ -46824,7 +46848,7 @@ $as_echo "$supports" >&6; }
|
||||
LDFLAGS_CXX_SOLSTUDIO="-norunpath"
|
||||
LDFLAGS_CXX_JDK="$LDFLAGS_CXX_JDK $LDFLAGS_CXX_SOLSTUDIO -xnolib"
|
||||
elif test "x$TOOLCHAIN_TYPE" = xxlc; then
|
||||
LDFLAGS_XLC="-brtl -bnolibpath -bexpall -bernotok"
|
||||
LDFLAGS_XLC="-b64 -brtl -bnolibpath -bexpall -bernotok"
|
||||
LDFLAGS_JDK="${LDFLAGS_JDK} $LDFLAGS_XLC"
|
||||
fi
|
||||
|
||||
@ -46881,6 +46905,7 @@ $as_echo "$supports" >&6; }
|
||||
|
||||
|
||||
|
||||
|
||||
LDFLAGS_TESTLIB="$LDFLAGS_JDKLIB"
|
||||
LDFLAGS_TESTEXE="$LDFLAGS_JDKEXE"
|
||||
|
||||
@ -58630,7 +58655,8 @@ fi
|
||||
|
||||
|
||||
# Setup libm (the maths library)
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5
|
||||
if test "x$OPENJDK_TARGET_OS" != "xwindows"; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5
|
||||
$as_echo_n "checking for cos in -lm... " >&6; }
|
||||
if ${ac_cv_lib_m_cos+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
@ -58675,12 +58701,15 @@ _ACEOF
|
||||
|
||||
else
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: Maths library was not found" >&5
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: Maths library was not found" >&5
|
||||
$as_echo "$as_me: Maths library was not found" >&6;}
|
||||
|
||||
fi
|
||||
|
||||
LIBM=-lm
|
||||
LIBM="-lm"
|
||||
else
|
||||
LIBM=""
|
||||
fi
|
||||
|
||||
|
||||
# Setup libdl (for dynamic library loading)
|
||||
|
||||
@ -106,7 +106,7 @@ apt_help() {
|
||||
devkit)
|
||||
PKGHANDLER_COMMAND="sudo apt-get install build-essential" ;;
|
||||
openjdk)
|
||||
PKGHANDLER_COMMAND="sudo apt-get install openjdk-7-jdk" ;;
|
||||
PKGHANDLER_COMMAND="sudo apt-get install openjdk-8-jdk" ;;
|
||||
alsa)
|
||||
PKGHANDLER_COMMAND="sudo apt-get install libasound2-dev" ;;
|
||||
cups)
|
||||
@ -127,7 +127,7 @@ yum_help() {
|
||||
devkit)
|
||||
PKGHANDLER_COMMAND="sudo yum groupinstall \"Development Tools\"" ;;
|
||||
openjdk)
|
||||
PKGHANDLER_COMMAND="sudo yum install java-1.7.0-openjdk" ;;
|
||||
PKGHANDLER_COMMAND="sudo yum install java-1.8.0-openjdk-devel" ;;
|
||||
alsa)
|
||||
PKGHANDLER_COMMAND="sudo yum install alsa-lib-devel" ;;
|
||||
cups)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -160,10 +160,14 @@ AC_DEFUN_ONCE([LIB_SETUP_LLVM],
|
||||
AC_DEFUN_ONCE([LIB_SETUP_MISC_LIBS],
|
||||
[
|
||||
# Setup libm (the maths library)
|
||||
AC_CHECK_LIB(m, cos, [], [
|
||||
AC_MSG_NOTICE([Maths library was not found])
|
||||
])
|
||||
LIBM=-lm
|
||||
if test "x$OPENJDK_TARGET_OS" != "xwindows"; then
|
||||
AC_CHECK_LIB(m, cos, [], [
|
||||
AC_MSG_NOTICE([Maths library was not found])
|
||||
])
|
||||
LIBM="-lm"
|
||||
else
|
||||
LIBM=""
|
||||
fi
|
||||
AC_SUBST(LIBM)
|
||||
|
||||
# Setup libdl (for dynamic library loading)
|
||||
|
||||
@ -406,6 +406,10 @@ AC_DEFUN([PLATFORM_SET_RELEASE_FILE_OS_VALUES],
|
||||
REQUIRED_OS_NAME=Darwin
|
||||
REQUIRED_OS_VERSION=11.2
|
||||
fi
|
||||
if test "x$OPENJDK_TARGET_OS" = "xaix"; then
|
||||
REQUIRED_OS_NAME=AIX
|
||||
REQUIRED_OS_VERSION=7.1
|
||||
fi
|
||||
|
||||
AC_SUBST(REQUIRED_OS_NAME)
|
||||
AC_SUBST(REQUIRED_OS_VERSION)
|
||||
|
||||
@ -314,6 +314,10 @@ COMPILER_SUPPORTS_TARGET_BITS_FLAG=@COMPILER_SUPPORTS_TARGET_BITS_FLAG@
|
||||
# Option used to pass a command file to the compiler
|
||||
COMPILER_COMMAND_FILE_FLAG:=@COMPILER_COMMAND_FILE_FLAG@
|
||||
|
||||
# Option for specifying a file which saves the binder commands
|
||||
# produced by the link step (for debugging, currently AIX only)
|
||||
COMPILER_BINDCMD_FILE_FLAG:=@COMPILER_BINDCMD_FILE_FLAG@
|
||||
|
||||
CC_OUT_OPTION:=@CC_OUT_OPTION@
|
||||
EXE_OUT_OPTION:=@EXE_OUT_OPTION@
|
||||
LD_OUT_OPTION:=@LD_OUT_OPTION@
|
||||
@ -351,6 +355,8 @@ CXXFLAGS_JDKLIB:=@CXXFLAGS_JDKLIB@
|
||||
CFLAGS_JDKEXE:=@CFLAGS_JDKEXE@
|
||||
CXXFLAGS_JDKEXE:=@CXXFLAGS_JDKEXE@
|
||||
|
||||
LDFLAGS_HASH_STYLE := @LDFLAGS_HASH_STYLE@
|
||||
|
||||
CXX:=@FIXPATH@ @CCACHE@ @ICECC@ @CXX@
|
||||
|
||||
CPP:=@FIXPATH@ @CPP@
|
||||
|
||||
@ -185,7 +185,6 @@ if [ "$OPENJDK_TARGET_OS" = "solaris" ] && [ "$OPENJDK_TARGET_CPU" = "x86_64" ];
|
||||
./lib/amd64/libjava.so
|
||||
./lib/amd64/libjawt.so
|
||||
./lib/amd64/libjdwp.so
|
||||
./lib/amd64/libjfr.so
|
||||
./lib/amd64/libjpeg.so
|
||||
./lib/amd64/libjsdt.so
|
||||
./lib/amd64/libjsound.so
|
||||
@ -321,7 +320,6 @@ if [ "$OPENJDK_TARGET_OS" = "solaris" ] && [ "$OPENJDK_TARGET_CPU" = "sparcv9" ]
|
||||
./lib/sparcv9/libjava.so
|
||||
./lib/sparcv9/libjawt.so
|
||||
./lib/sparcv9/libjdwp.so
|
||||
./lib/sparcv9/libjfr.so
|
||||
./lib/sparcv9/libjpeg.so
|
||||
./lib/sparcv9/libjsdt.so
|
||||
./lib/sparcv9/libjsound.so
|
||||
|
||||
@ -1293,12 +1293,8 @@ jdk/src/jdk.crypto.pkcs11/windows/native/libj2pkcs11/j2secmod_md.c : jdk/src/win
|
||||
jdk/src/jdk.crypto.pkcs11/windows/native/libj2pkcs11/j2secmod_md.h : jdk/src/windows/native/sun/security/pkcs11/j2secmod_md.h
|
||||
jdk/src/jdk.crypto.pkcs11/windows/native/libj2pkcs11/p11_md.c : jdk/src/windows/native/sun/security/pkcs11/wrapper/p11_md.c
|
||||
jdk/src/jdk.crypto.pkcs11/windows/native/libj2pkcs11/p11_md.h : jdk/src/windows/native/sun/security/pkcs11/wrapper/p11_md.h
|
||||
jdk/src/jdk.deploy.osx/macosx/classes/com/apple/concurrent/package.html : jdk/src/macosx/classes/com/apple/concurrent/package.html
|
||||
jdk/src/jdk.deploy.osx/macosx/classes/com/apple/concurrent : jdk/src/macosx/classes/com/apple/concurrent
|
||||
jdk/src/jdk.deploy.osx/macosx/native/libosx/CFileManager.m : jdk/src/macosx/native/com/apple/eio/CFileManager.m
|
||||
jdk/src/jdk.deploy.osx/macosx/native/libosx/Dispatch.m : jdk/src/macosx/native/com/apple/concurrent/Dispatch.m
|
||||
jdk/src/jdk.deploy.osx/macosx/native/libosx/JavaAppLauncher.m : jdk/src/macosx/native/apple/launcher/JavaAppLauncher.m
|
||||
jdk/src/jdk.deploy.osx/macosx/native/libosx/KeystoreImpl.m : jdk/src/macosx/native/apple/security/KeystoreImpl.m
|
||||
jdk/src/java.desktop/macosx/native/libosx/CFileManager.m : jdk/src/macosx/native/com/apple/eio/CFileManager.m
|
||||
jdk/src/java.base/macosx/native/libosxsecurity/KeystoreImpl.m : jdk/src/macosx/native/apple/security/KeystoreImpl.m
|
||||
jdk/src/jdk.hprof.agent/share/classes/com/sun/demo/jvmti/hprof : jdk/src/share/classes/com/sun/demo/jvmti/hprof
|
||||
jdk/src/jdk.httpserver/share/classes/com/sun/net/httpserver : jdk/src/share/classes/com/sun/net/httpserver
|
||||
jdk/src/jdk.httpserver/share/classes/sun/net/httpserver : jdk/src/share/classes/sun/net/httpserver
|
||||
|
||||
@ -311,6 +311,16 @@ var getJibProfilesProfiles = function (input, common) {
|
||||
labels: [ "open" ]
|
||||
},
|
||||
|
||||
"linux-x86-open": {
|
||||
target_os: mainProfiles["linux-x86"].target_os,
|
||||
target_cpu: mainProfiles["linux-x86"].target_cpu,
|
||||
dependencies: mainProfiles["linux-x86"].dependencies,
|
||||
configure_args: concat(mainProfiles["linux-x86"].configure_args,
|
||||
"--enable-openjdk-only"),
|
||||
make_args: mainProfiles["linux-x86"].make_args,
|
||||
labels: [ "open" ]
|
||||
},
|
||||
|
||||
"solaris-x64-open": {
|
||||
target_os: mainProfiles["solaris-x64"].target_os,
|
||||
target_cpu: mainProfiles["solaris-x64"].target_cpu,
|
||||
@ -319,6 +329,16 @@ var getJibProfilesProfiles = function (input, common) {
|
||||
"--enable-openjdk-only"),
|
||||
make_args: mainProfiles["solaris-x64"].make_args,
|
||||
labels: [ "open" ]
|
||||
},
|
||||
|
||||
"windows-x86-open": {
|
||||
target_os: mainProfiles["windows-x86"].target_os,
|
||||
target_cpu: mainProfiles["windows-x86"].target_cpu,
|
||||
dependencies: mainProfiles["windows-x86"].dependencies,
|
||||
configure_args: concat(mainProfiles["windows-x86"].configure_args,
|
||||
"--enable-openjdk-only"),
|
||||
make_args: mainProfiles["windows-x86"].make_args,
|
||||
labels: [ "open" ]
|
||||
}
|
||||
};
|
||||
profiles = concatObjects(profiles, jprtOpenProfiles);
|
||||
|
||||
@ -349,3 +349,6 @@ ea285530245cf4e0edf0479121a41347d3030eba jdk-9+98
|
||||
e385e95e6101711d5c63e7b1a827e99b6ec7a1cc jdk-9+104
|
||||
64006ae915b3aa85ac7e6fac679024d2da7fe526 jdk-9+105
|
||||
8ec4f97943fe56f93e4621f622b56b7144c0181a jdk-9+106
|
||||
49202432b69445164a42be7cbdf74ed5fce98157 jdk-9+107
|
||||
84f2862a25eb3232ff36c376b4e2bf2a83dfced3 jdk-9+108
|
||||
b75afa17aefe480c23c616a6a2497063312f7189 jdk-9+109
|
||||
|
||||
@ -509,3 +509,6 @@ c5f55130b1b69510d9a6f4a3105b58e21cd7ffe1 jdk-9+103
|
||||
534c50395957c6025fb6627e93b35756f8d48a08 jdk-9+104
|
||||
266fa9bb5297bf02cb2a7b038b10a109817d2b48 jdk-9+105
|
||||
7232de4c17c37f60aecec4f3191090bd3d41d334 jdk-9+106
|
||||
c5146d4da417f76edfc43097d2e2ced042a65b4e jdk-9+107
|
||||
934f6793f5f7dca44f69b4559d525fa64b31840d jdk-9+108
|
||||
7e7e50ac4faf19899fc811569e32cfa478759ebb jdk-9+109
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -57,11 +57,6 @@ ifeq ($(HAS_ALT_SRC), true)
|
||||
TraceGeneratedNames += \
|
||||
traceRequestables.hpp \
|
||||
traceEventControl.hpp
|
||||
|
||||
ifneq ($(INCLUDE_TRACE), false)
|
||||
TraceGeneratedNames += traceProducer.cpp
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
TraceGeneratedFiles = $(TraceGeneratedNames:%=$(TraceOutDir)/%)
|
||||
@ -100,9 +95,6 @@ else
|
||||
$(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventClasses.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
$(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -37,3 +37,11 @@ ifndef USE_SUNCC
|
||||
endif
|
||||
|
||||
OPT_CFLAGS/compactingPermGenGen.o = -O1
|
||||
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/sharedRuntimeTrig.o += -g
|
||||
OPT_CFLAGS/sharedRuntimeTrans.o += -g
|
||||
OPT_CFLAGS/compactingPermGenGen.o += -g
|
||||
endif
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -24,8 +24,4 @@
|
||||
|
||||
Obj_Files += bsd_arm.o
|
||||
|
||||
ifneq ($(EXT_LIBS_PATH),)
|
||||
LIBS += $(EXT_LIBS_PATH)/sflt_glibc.a
|
||||
endif
|
||||
|
||||
CFLAGS += -DVM_LITTLE_ENDIAN
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -330,6 +330,13 @@ ifeq ($(USE_CLANG), true)
|
||||
), 1)
|
||||
OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT)
|
||||
OPT_CFLAGS/unsafe.o += -O1
|
||||
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/loopTransform.o += -g
|
||||
OPT_CFLAGS/unsafe.o += -g
|
||||
endif
|
||||
else
|
||||
$(error "Update compiler workarounds for Clang $(CC_VER_MAJOR).$(CC_VER_MINOR)")
|
||||
endif
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -57,11 +57,6 @@ ifeq ($(HAS_ALT_SRC), true)
|
||||
TraceGeneratedNames += \
|
||||
traceRequestables.hpp \
|
||||
traceEventControl.hpp
|
||||
|
||||
ifneq ($(INCLUDE_TRACE), false)
|
||||
TraceGeneratedNames += traceProducer.cpp
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
|
||||
@ -101,9 +96,6 @@ else
|
||||
$(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventClasses.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
$(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -32,3 +32,11 @@ CFLAGS += -DVM_LITTLE_ENDIAN
|
||||
CFLAGS += -D_LP64=1
|
||||
|
||||
OPT_CFLAGS/compactingPermGenGen.o = -O1
|
||||
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/sharedRuntimeTrig.o += -g
|
||||
OPT_CFLAGS/sharedRuntimeTrans.o += -g
|
||||
OPT_CFLAGS/compactingPermGenGen.o += -g
|
||||
endif
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -265,6 +265,11 @@ ifeq "$(shell expr \( $(CC_VER_MAJOR) \> 4 \) \| \( \( $(CC_VER_MAJOR) = 4 \) \&
|
||||
# GCC >= 4.3
|
||||
# Gcc 4.1.2 does not support this flag, nor does it have problems compiling the file.
|
||||
OPT_CFLAGS/vmStructs.o += -fno-var-tracking-assignments
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/vmStructs.o += -g
|
||||
endif
|
||||
endif
|
||||
|
||||
# The gcc compiler segv's on ia64 when compiling bytecodeInterpreter.cpp
|
||||
@ -277,6 +282,11 @@ endif
|
||||
ifeq ($(USE_CLANG), true)
|
||||
ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 2), 1)
|
||||
OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT)
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/loopTransform.o += -g
|
||||
endif
|
||||
endif
|
||||
else
|
||||
# Do not allow GCC 4.1.1
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -32,3 +32,11 @@ OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
|
||||
CFLAGS += -DVM_LITTLE_ENDIAN
|
||||
|
||||
OPT_CFLAGS/compactingPermGenGen.o = -O1
|
||||
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/sharedRuntimeTrig.o += -g
|
||||
OPT_CFLAGS/sharedRuntimeTrans.o += -g
|
||||
OPT_CFLAGS/compactingPermGenGen.o += -g
|
||||
endif
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -57,11 +57,6 @@ ifeq ($(HAS_ALT_SRC), true)
|
||||
TraceGeneratedNames += \
|
||||
traceRequestables.hpp \
|
||||
traceEventControl.hpp
|
||||
|
||||
ifneq ($(INCLUDE_TRACE), false)
|
||||
TraceGeneratedNames += traceProducer.cpp
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
TraceGeneratedFiles = $(TraceGeneratedNames:%=$(TraceOutDir)/%)
|
||||
@ -100,9 +95,6 @@ else
|
||||
$(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventClasses.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
$(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright 2007, 2008 Red Hat, Inc.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
@ -29,12 +29,6 @@
|
||||
ifeq ($(USE_CLANG), true)
|
||||
WARNING_FLAGS += -Wno-undef
|
||||
endif
|
||||
# Suppress some warning flags that are normally turned on for hotspot,
|
||||
# because some of the zero code has not been updated accordingly.
|
||||
WARNING_FLAGS += -Wno-return-type \
|
||||
-Wno-format-nonliteral -Wno-format-security \
|
||||
-Wno-maybe-uninitialized
|
||||
|
||||
|
||||
# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized
|
||||
OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2004, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,6 +34,14 @@ ifeq ("${Platform_compiler}", "sparcWorks")
|
||||
OPT_CFLAGS/generateOptoStub.o = -xO2
|
||||
# Temporary util SS12u1 C++ compiler is fixed
|
||||
OPT_CFLAGS/c1_LinearScan.o = -xO2
|
||||
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/generateOptoStub.o += -g0 -xs
|
||||
OPT_CFLAGS/c1_LinearScan.o += -g0 -xs
|
||||
endif
|
||||
|
||||
else
|
||||
|
||||
ifeq ("${Platform_compiler}", "gcc")
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -35,11 +35,21 @@ OPT_CFLAGS/BYFILE = $(OPT_CFLAGS/$@)$(OPT_CFLAGS/DEFAULT$(OPT_CFLAGS/$@))
|
||||
# for this method for now. (fix this when dtrace bug 6258412 is fixed)
|
||||
ifndef USE_GCC
|
||||
OPT_CFLAGS/ciEnv.o = $(OPT_CFLAGS) -xinline=no%__1cFciEnvbFpost_compiled_method_load_event6MpnHnmethod__v_
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/ciEnv.o += -g0 -xs
|
||||
endif
|
||||
endif
|
||||
|
||||
# Need extra inlining to get oop_ps_push_contents functions to perform well enough.
|
||||
ifndef USE_GCC
|
||||
OPT_CFLAGS/psPromotionManager.o = $(OPT_CFLAGS) -W2,-Ainline:inc=1000
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/psPromotionManager.o += -g0 -xs
|
||||
endif
|
||||
endif
|
||||
|
||||
# (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files)
|
||||
@ -55,6 +65,12 @@ endif # COMPILER_REV_NUMERIC == 510
|
||||
ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 509), 1)
|
||||
# dtrace cannot handle tail call optimization (6672627, 6693876)
|
||||
OPT_CFLAGS/jni.o = $(OPT_CFLAGS/DEFAULT) $(OPT_CCFLAGS/NO_TAIL_CALL_OPT)
|
||||
# The -g0 -xs flag is added to OPT_CFLAGS in sparcWorks.make, but lost in case of
|
||||
# per-file overrides of OPT_CFLAGS. Restore it here. This is mainly needed
|
||||
# to provide a good baseline to compare the new build against.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/jni.o += -g0 -xs
|
||||
endif
|
||||
endif # COMPILER_NUMERIC_REV >= 509
|
||||
|
||||
# Workaround SS11 bug 6345274 (all platforms) (Fixed in SS11 patch and SS12)
|
||||
|
||||
@ -158,9 +158,20 @@ OPT_CFLAGS/NO_TAIL_CALL_OPT = -Wu,-O~yz
|
||||
OPT_CCFLAGS/NO_TAIL_CALL_OPT = -Qoption ube -O~yz
|
||||
OPT_CFLAGS/stubGenerator_x86_32.o = $(OPT_CFLAGS) -xspace
|
||||
OPT_CFLAGS/stubGenerator_x86_64.o = $(OPT_CFLAGS) -xspace
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/stubGenerator_x86_32.o += -g0 -xs
|
||||
OPT_CFLAGS/stubGenerator_x86_64.o += -g0 -xs
|
||||
endif
|
||||
endif # Platform_arch == x86
|
||||
ifeq ("${Platform_arch}", "sparc")
|
||||
OPT_CFLAGS/stubGenerator_sparc.o = $(OPT_CFLAGS) -xspace
|
||||
# The debug flag is added to OPT_CFLAGS, but lost in case of per-file overrides
|
||||
# of OPT_CFLAGS. Restore it here.
|
||||
ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
|
||||
OPT_CFLAGS/stubGenerator_sparc.o += -g0 -xs
|
||||
endif
|
||||
endif
|
||||
endif # COMPILER_REV_NUMERIC >= 509
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -56,8 +56,7 @@ TraceGeneratedNames = \
|
||||
ifeq ($(HAS_ALT_SRC), true)
|
||||
TraceGeneratedNames += \
|
||||
traceRequestables.hpp \
|
||||
traceEventControl.hpp \
|
||||
traceProducer.cpp
|
||||
traceEventControl.hpp
|
||||
endif
|
||||
|
||||
TraceGeneratedFiles = $(TraceGeneratedNames:%=$(TraceOutDir)/%)
|
||||
@ -96,9 +95,6 @@ else
|
||||
$(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventClasses.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
$(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS)
|
||||
$(GENERATE_CODE)
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -43,8 +43,7 @@ TraceGeneratedNames = \
|
||||
!if EXISTS($(TraceAltSrcDir))
|
||||
TraceGeneratedNames = $(TraceGeneratedNames) \
|
||||
traceRequestables.hpp \
|
||||
traceEventControl.hpp \
|
||||
traceProducer.cpp
|
||||
traceEventControl.hpp
|
||||
!endif
|
||||
|
||||
|
||||
@ -58,8 +57,7 @@ TraceGeneratedFiles = \
|
||||
!if EXISTS($(TraceAltSrcDir))
|
||||
TraceGeneratedFiles = $(TraceGeneratedFiles) \
|
||||
$(TraceOutDir)/traceRequestables.hpp \
|
||||
$(TraceOutDir)/traceEventControl.hpp \
|
||||
$(TraceOutDir)/traceProducer.cpp
|
||||
$(TraceOutDir)/traceEventControl.hpp
|
||||
!endif
|
||||
|
||||
XSLT = $(QUIETLY) $(REMOTE) $(RUN_JAVA) -classpath $(JvmtiOutDir) jvmtiGen
|
||||
@ -98,10 +96,6 @@ $(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)
|
||||
@echo Generating AltSrc $@
|
||||
@$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceAltSrcDir)/traceEventClasses.xsl -OUT $(TraceOutDir)/traceEventClasses.hpp
|
||||
|
||||
$(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS)
|
||||
@echo Generating AltSrc $@
|
||||
@$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceAltSrcDir)/traceProducer.xsl -OUT $(TraceOutDir)/traceProducer.cpp
|
||||
|
||||
$(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS)
|
||||
@echo Generating AltSrc $@
|
||||
@$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceAltSrcDir)/traceRequestables.xsl -OUT $(TraceOutDir)/traceRequestables.hpp
|
||||
|
||||
@ -3425,9 +3425,6 @@ const bool Matcher::misaligned_vectors_ok() {
|
||||
// false => size gets scaled to BytesPerLong, ok.
|
||||
const bool Matcher::init_array_count_is_in_bytes = false;
|
||||
|
||||
// Threshold size for cleararray.
|
||||
const int Matcher::init_array_short_size = 18 * BytesPerLong;
|
||||
|
||||
// Use conditional move (CMOVL)
|
||||
const int Matcher::long_cmove_cost() {
|
||||
// long cmoves are no more expensive than int cmoves
|
||||
@ -4135,14 +4132,14 @@ encode %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
&Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr);
|
||||
Assembler::xword, /*acquire*/ false, /*release*/ true);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
&Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
|
||||
Assembler::word, /*acquire*/ false, /*release*/ true);
|
||||
%}
|
||||
|
||||
|
||||
@ -4154,14 +4151,14 @@ encode %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
&Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr);
|
||||
Assembler::xword, /*acquire*/ true, /*release*/ true);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
|
||||
__ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
|
||||
&Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
|
||||
Assembler::word, /*acquire*/ true, /*release*/ true);
|
||||
%}
|
||||
|
||||
|
||||
@ -4679,8 +4676,14 @@ encode %{
|
||||
|
||||
// Compare object markOop with mark and if equal exchange scratch1
|
||||
// with object markOop.
|
||||
{
|
||||
if (UseLSE) {
|
||||
__ mov(tmp, disp_hdr);
|
||||
__ casal(Assembler::xword, tmp, box, oop);
|
||||
__ cmp(tmp, disp_hdr);
|
||||
__ br(Assembler::EQ, cont);
|
||||
} else {
|
||||
Label retry_load;
|
||||
__ prfm(Address(oop), PSTL1STRM);
|
||||
__ bind(retry_load);
|
||||
__ ldaxr(tmp, oop);
|
||||
__ cmp(tmp, disp_hdr);
|
||||
@ -4729,8 +4732,13 @@ encode %{
|
||||
__ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
|
||||
__ mov(disp_hdr, zr);
|
||||
|
||||
{
|
||||
if (UseLSE) {
|
||||
__ mov(rscratch1, disp_hdr);
|
||||
__ casal(Assembler::xword, rscratch1, rthread, tmp);
|
||||
__ cmp(rscratch1, disp_hdr);
|
||||
} else {
|
||||
Label retry_load, fail;
|
||||
__ prfm(Address(tmp), PSTL1STRM);
|
||||
__ bind(retry_load);
|
||||
__ ldaxr(rscratch1, tmp);
|
||||
__ cmp(disp_hdr, rscratch1);
|
||||
@ -4818,8 +4826,13 @@ encode %{
|
||||
// see the stack address of the basicLock in the markOop of the
|
||||
// object.
|
||||
|
||||
{
|
||||
if (UseLSE) {
|
||||
__ mov(tmp, box);
|
||||
__ casl(Assembler::xword, tmp, disp_hdr, oop);
|
||||
__ cmp(tmp, box);
|
||||
} else {
|
||||
Label retry_load;
|
||||
__ prfm(Address(oop), PSTL1STRM);
|
||||
__ bind(retry_load);
|
||||
__ ldxr(tmp, oop);
|
||||
__ cmp(box, tmp);
|
||||
@ -13281,7 +13294,7 @@ instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
|
||||
__ fmovs($dst$$Register, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_memory);
|
||||
ins_pipe(fp_f2i);
|
||||
|
||||
%}
|
||||
|
||||
@ -13299,7 +13312,7 @@ instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
|
||||
__ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_memory);
|
||||
ins_pipe(fp_i2f);
|
||||
|
||||
%}
|
||||
|
||||
@ -13317,7 +13330,7 @@ instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
|
||||
__ fmovd($dst$$Register, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_memory);
|
||||
ins_pipe(fp_d2l);
|
||||
|
||||
%}
|
||||
|
||||
@ -13335,7 +13348,7 @@ instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
|
||||
__ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_memory);
|
||||
ins_pipe(fp_l2d);
|
||||
|
||||
%}
|
||||
|
||||
@ -14191,6 +14204,25 @@ instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg
|
||||
ins_pipe(pipe_cmp_branch);
|
||||
%}
|
||||
|
||||
instruct cmpN_imm0_branch(cmpOp cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
|
||||
match(If cmp (CmpN op1 op2));
|
||||
predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
|
||||
|| n->in(1)->as_Bool()->_test._test == BoolTest::eq);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(BRANCH_COST);
|
||||
format %{ "cbw$cmp $op1, $labl" %}
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
|
||||
if (cond == Assembler::EQ)
|
||||
__ cbzw($op1$$Register, *L);
|
||||
else
|
||||
__ cbnzw($op1$$Register, *L);
|
||||
%}
|
||||
ins_pipe(pipe_cmp_branch);
|
||||
%}
|
||||
|
||||
instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
|
||||
match(If cmp (CmpP (DecodeN oop) zero));
|
||||
predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
|
||||
@ -14783,19 +14815,19 @@ instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
|
||||
%}
|
||||
|
||||
instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
|
||||
iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
|
||||
iRegI_R0 result, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!CompactStrings);
|
||||
match(Set result (StrEquals (Binary str1 str2) cnt));
|
||||
effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
|
||||
|
||||
format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp" %}
|
||||
format %{ "String Equals $str1,$str2,$cnt -> $result" %}
|
||||
ins_encode %{
|
||||
// Count is in 8-bit bytes; non-Compact chars are 16 bits.
|
||||
__ asrw($cnt$$Register, $cnt$$Register, 1);
|
||||
__ string_equals($str1$$Register, $str2$$Register,
|
||||
$cnt$$Register, $result$$Register,
|
||||
$tmp$$Register);
|
||||
__ arrays_equals($str1$$Register, $str2$$Register,
|
||||
$result$$Register, $cnt$$Register,
|
||||
2, /*is_string*/true);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
@ -14809,9 +14841,10 @@ instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
|
||||
|
||||
format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
|
||||
ins_encode %{
|
||||
__ byte_arrays_equals($ary1$$Register, $ary2$$Register,
|
||||
$result$$Register, $tmp$$Register);
|
||||
%}
|
||||
__ arrays_equals($ary1$$Register, $ary2$$Register,
|
||||
$result$$Register, $tmp$$Register,
|
||||
1, /*is_string*/false);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
@ -14824,12 +14857,14 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
|
||||
|
||||
format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
|
||||
ins_encode %{
|
||||
__ char_arrays_equals($ary1$$Register, $ary2$$Register,
|
||||
$result$$Register, $tmp$$Register);
|
||||
__ arrays_equals($ary1$$Register, $ary2$$Register,
|
||||
$result$$Register, $tmp$$Register,
|
||||
2, /*is_string*/false);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
|
||||
// encode char[] to byte[] in ISO_8859_1
|
||||
instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
||||
vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
|
||||
@ -16480,7 +16515,7 @@ instruct vsll2I(vecD dst, vecD src, vecX shift) %{
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($shift$$reg));
|
||||
%}
|
||||
ins_pipe(vshift64_imm);
|
||||
ins_pipe(vshift64);
|
||||
%}
|
||||
|
||||
instruct vsll4I(vecX dst, vecX src, vecX shift) %{
|
||||
@ -16494,7 +16529,7 @@ instruct vsll4I(vecX dst, vecX src, vecX shift) %{
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($shift$$reg));
|
||||
%}
|
||||
ins_pipe(vshift128_imm);
|
||||
ins_pipe(vshift128);
|
||||
%}
|
||||
|
||||
instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
|
||||
@ -16507,7 +16542,7 @@ instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($shift$$reg));
|
||||
%}
|
||||
ins_pipe(vshift64_imm);
|
||||
ins_pipe(vshift64);
|
||||
%}
|
||||
|
||||
instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
|
||||
@ -16520,7 +16555,7 @@ instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($shift$$reg));
|
||||
%}
|
||||
ins_pipe(vshift128_imm);
|
||||
ins_pipe(vshift128);
|
||||
%}
|
||||
|
||||
instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
|
||||
@ -16638,7 +16673,7 @@ instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant & 63);
|
||||
%}
|
||||
ins_pipe(vshift128);
|
||||
ins_pipe(vshift128_imm);
|
||||
%}
|
||||
|
||||
instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
|
||||
|
||||
@ -972,7 +972,7 @@ public:
|
||||
|
||||
// System
|
||||
void system(int op0, int op1, int CRn, int CRm, int op2,
|
||||
Register rt = (Register)0b11111)
|
||||
Register rt = dummy_reg)
|
||||
{
|
||||
starti;
|
||||
f(0b11010101000, 31, 21);
|
||||
@ -1082,7 +1082,7 @@ public:
|
||||
|
||||
#define INSN(NAME, opc) \
|
||||
void NAME() { \
|
||||
branch_reg((Register)0b11111, opc); \
|
||||
branch_reg(dummy_reg, opc); \
|
||||
}
|
||||
|
||||
INSN(eret, 0b0100);
|
||||
@ -1094,10 +1094,22 @@ public:
|
||||
enum operand_size { byte, halfword, word, xword };
|
||||
|
||||
void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
|
||||
Register Rn, enum operand_size sz, int op, int o0) {
|
||||
Register Rn, enum operand_size sz, int op, bool ordered) {
|
||||
starti;
|
||||
f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
|
||||
rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
|
||||
rf(Rs, 16), f(ordered, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
|
||||
}
|
||||
|
||||
void load_exclusive(Register dst, Register addr,
|
||||
enum operand_size sz, bool ordered) {
|
||||
load_store_exclusive(dummy_reg, dst, dummy_reg, addr,
|
||||
sz, 0b010, ordered);
|
||||
}
|
||||
|
||||
void store_exclusive(Register status, Register new_val, Register addr,
|
||||
enum operand_size sz, bool ordered) {
|
||||
load_store_exclusive(status, new_val, dummy_reg, addr,
|
||||
sz, 0b000, ordered);
|
||||
}
|
||||
|
||||
#define INSN4(NAME, sz, op, o0) /* Four registers */ \
|
||||
@ -1109,19 +1121,19 @@ public:
|
||||
#define INSN3(NAME, sz, op, o0) /* Three registers */ \
|
||||
void NAME(Register Rs, Register Rt, Register Rn) { \
|
||||
guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \
|
||||
load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \
|
||||
load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \
|
||||
}
|
||||
|
||||
#define INSN2(NAME, sz, op, o0) /* Two registers */ \
|
||||
void NAME(Register Rt, Register Rn) { \
|
||||
load_store_exclusive((Register)0b11111, Rt, (Register)0b11111, \
|
||||
load_store_exclusive(dummy_reg, Rt, dummy_reg, \
|
||||
Rn, sz, op, o0); \
|
||||
}
|
||||
|
||||
#define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
|
||||
void NAME(Register Rt1, Register Rt2, Register Rn) { \
|
||||
guarantee(Rt1 != Rt2, "unpredictable instruction"); \
|
||||
load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0); \
|
||||
load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0); \
|
||||
}
|
||||
|
||||
// bytes
|
||||
@ -1169,6 +1181,46 @@ public:
|
||||
#undef INSN4
|
||||
#undef INSN_FOO
|
||||
|
||||
// 8.1 Compare and swap extensions
|
||||
void lse_cas(Register Rs, Register Rt, Register Rn,
|
||||
enum operand_size sz, bool a, bool r, bool not_pair) {
|
||||
starti;
|
||||
if (! not_pair) { // Pair
|
||||
assert(sz == word || sz == xword, "invalid size");
|
||||
/* The size bit is in bit 30, not 31 */
|
||||
sz = (operand_size)(sz == word ? 0b00:0b01);
|
||||
}
|
||||
f(sz, 31, 30), f(0b001000, 29, 24), f(1, 23), f(a, 22), f(1, 21);
|
||||
rf(Rs, 16), f(r, 15), f(0b11111, 14, 10), rf(Rn, 5), rf(Rt, 0);
|
||||
}
|
||||
|
||||
// CAS
|
||||
#define INSN(NAME, a, r) \
|
||||
void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \
|
||||
assert(Rs != Rn && Rs != Rt, "unpredictable instruction"); \
|
||||
lse_cas(Rs, Rt, Rn, sz, a, r, true); \
|
||||
}
|
||||
INSN(cas, false, false)
|
||||
INSN(casa, true, false)
|
||||
INSN(casl, false, true)
|
||||
INSN(casal, true, true)
|
||||
#undef INSN
|
||||
|
||||
// CASP
|
||||
#define INSN(NAME, a, r) \
|
||||
void NAME(operand_size sz, Register Rs, Register Rs1, \
|
||||
Register Rt, Register Rt1, Register Rn) { \
|
||||
assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 && \
|
||||
Rs->successor() == Rs1 && Rt->successor() == Rt1 && \
|
||||
Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers"); \
|
||||
lse_cas(Rs, Rt, Rn, sz, a, r, false); \
|
||||
}
|
||||
INSN(casp, false, false)
|
||||
INSN(caspa, true, false)
|
||||
INSN(caspl, false, true)
|
||||
INSN(caspal, true, true)
|
||||
#undef INSN
|
||||
|
||||
// Load register (literal)
|
||||
#define INSN(NAME, opc, V) \
|
||||
void NAME(Register Rt, address dest) { \
|
||||
|
||||
@ -1556,38 +1556,54 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
|
||||
}
|
||||
|
||||
void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
|
||||
Label retry_load, nope;
|
||||
// flush and load exclusive from the memory location
|
||||
// and fail if it is not what we expect
|
||||
__ bind(retry_load);
|
||||
__ ldaxrw(rscratch1, addr);
|
||||
__ cmpw(rscratch1, cmpval);
|
||||
__ cset(rscratch1, Assembler::NE);
|
||||
__ br(Assembler::NE, nope);
|
||||
// if we store+flush with no intervening write rscratch1 wil be zero
|
||||
__ stlxrw(rscratch1, newval, addr);
|
||||
// retry so we only ever return after a load fails to compare
|
||||
// ensures we don't return a stale value after a failed write.
|
||||
__ cbnzw(rscratch1, retry_load);
|
||||
__ bind(nope);
|
||||
if (UseLSE) {
|
||||
__ mov(rscratch1, cmpval);
|
||||
__ casal(Assembler::word, rscratch1, newval, addr);
|
||||
__ cmpw(rscratch1, cmpval);
|
||||
__ cset(rscratch1, Assembler::NE);
|
||||
} else {
|
||||
Label retry_load, nope;
|
||||
// flush and load exclusive from the memory location
|
||||
// and fail if it is not what we expect
|
||||
__ prfm(Address(addr), PSTL1STRM);
|
||||
__ bind(retry_load);
|
||||
__ ldaxrw(rscratch1, addr);
|
||||
__ cmpw(rscratch1, cmpval);
|
||||
__ cset(rscratch1, Assembler::NE);
|
||||
__ br(Assembler::NE, nope);
|
||||
// if we store+flush with no intervening write rscratch1 wil be zero
|
||||
__ stlxrw(rscratch1, newval, addr);
|
||||
// retry so we only ever return after a load fails to compare
|
||||
// ensures we don't return a stale value after a failed write.
|
||||
__ cbnzw(rscratch1, retry_load);
|
||||
__ bind(nope);
|
||||
}
|
||||
__ membar(__ AnyAny);
|
||||
}
|
||||
|
||||
void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
|
||||
Label retry_load, nope;
|
||||
// flush and load exclusive from the memory location
|
||||
// and fail if it is not what we expect
|
||||
__ bind(retry_load);
|
||||
__ ldaxr(rscratch1, addr);
|
||||
__ cmp(rscratch1, cmpval);
|
||||
__ cset(rscratch1, Assembler::NE);
|
||||
__ br(Assembler::NE, nope);
|
||||
// if we store+flush with no intervening write rscratch1 wil be zero
|
||||
__ stlxr(rscratch1, newval, addr);
|
||||
// retry so we only ever return after a load fails to compare
|
||||
// ensures we don't return a stale value after a failed write.
|
||||
__ cbnz(rscratch1, retry_load);
|
||||
__ bind(nope);
|
||||
if (UseLSE) {
|
||||
__ mov(rscratch1, cmpval);
|
||||
__ casal(Assembler::xword, rscratch1, newval, addr);
|
||||
__ cmp(rscratch1, cmpval);
|
||||
__ cset(rscratch1, Assembler::NE);
|
||||
} else {
|
||||
Label retry_load, nope;
|
||||
// flush and load exclusive from the memory location
|
||||
// and fail if it is not what we expect
|
||||
__ prfm(Address(addr), PSTL1STRM);
|
||||
__ bind(retry_load);
|
||||
__ ldaxr(rscratch1, addr);
|
||||
__ cmp(rscratch1, cmpval);
|
||||
__ cset(rscratch1, Assembler::NE);
|
||||
__ br(Assembler::NE, nope);
|
||||
// if we store+flush with no intervening write rscratch1 wil be zero
|
||||
__ stlxr(rscratch1, newval, addr);
|
||||
// retry so we only ever return after a load fails to compare
|
||||
// ensures we don't return a stale value after a failed write.
|
||||
__ cbnz(rscratch1, retry_load);
|
||||
__ bind(nope);
|
||||
}
|
||||
__ membar(__ AnyAny);
|
||||
}
|
||||
|
||||
@ -3156,6 +3172,7 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr
|
||||
}
|
||||
Label again;
|
||||
__ lea(tmp, addr);
|
||||
__ prfm(Address(tmp), PSTL1STRM);
|
||||
__ bind(again);
|
||||
(_masm->*lda)(dst, tmp);
|
||||
(_masm->*add)(rscratch1, dst, inc);
|
||||
@ -3175,6 +3192,7 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr
|
||||
assert_different_registers(obj, addr.base(), tmp, rscratch2, dst);
|
||||
Label again;
|
||||
__ lea(tmp, addr);
|
||||
__ prfm(Address(tmp), PSTL1STRM);
|
||||
__ bind(again);
|
||||
(_masm->*lda)(dst, tmp);
|
||||
(_masm->*stl)(rscratch2, obj, tmp);
|
||||
|
||||
@ -76,6 +76,8 @@ define_pd_global(bool, CompactStrings, false);
|
||||
// avoid biased locking while we are bootstrapping the aarch64 build
|
||||
define_pd_global(bool, UseBiasedLocking, false);
|
||||
|
||||
define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong);
|
||||
|
||||
#if defined(COMPILER1) || defined(COMPILER2)
|
||||
define_pd_global(intx, InlineSmallCode, 1000);
|
||||
#endif
|
||||
@ -101,9 +103,13 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
||||
\
|
||||
product(bool, UseCRC32, false, \
|
||||
"Use CRC32 instructions for CRC32 computation") \
|
||||
\
|
||||
product(bool, UseLSE, false, \
|
||||
"Use LSE instructions") \
|
||||
|
||||
// Don't attempt to use Neon on builtin sim until builtin sim supports it
|
||||
#define UseCRC32 false
|
||||
#define UseSIMDForMemoryOps false
|
||||
|
||||
#else
|
||||
#define UseBuiltinSim false
|
||||
@ -121,6 +127,10 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
||||
"Use Neon for CRC32 computation") \
|
||||
product(bool, UseCRC32, false, \
|
||||
"Use CRC32 instructions for CRC32 computation") \
|
||||
product(bool, UseSIMDForMemoryOps, false, \
|
||||
"Use SIMD instructions in generated memory move code") \
|
||||
product(bool, UseLSE, false, \
|
||||
"Use LSE instructions") \
|
||||
product(bool, TraceTraps, false, "Trace all traps the signal handler")
|
||||
|
||||
#endif
|
||||
|
||||
@ -74,7 +74,7 @@ void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, T
|
||||
void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
|
||||
address pc = _instructions->start() + pc_offset;
|
||||
NativeInstruction* inst = nativeInstruction_at(pc);
|
||||
if (inst->is_adr_aligned()) {
|
||||
if (inst->is_adr_aligned() || inst->is_ldr_literal()) {
|
||||
address dest = _constants->start() + data_offset;
|
||||
_instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS));
|
||||
TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
|
||||
|
||||
@ -1638,6 +1638,7 @@ Address MacroAssembler::form_address(Register Rd, Register base, long byte_offse
|
||||
|
||||
void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
|
||||
Label retry_load;
|
||||
prfm(Address(counter_addr), PSTL1STRM);
|
||||
bind(retry_load);
|
||||
// flush and load exclusive from the memory location
|
||||
ldxrw(tmp, counter_addr);
|
||||
@ -2070,25 +2071,32 @@ void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Reg
|
||||
// oldv holds comparison value
|
||||
// newv holds value to write in exchange
|
||||
// addr identifies memory word to compare against/update
|
||||
// tmp returns 0/1 for success/failure
|
||||
Label retry_load, nope;
|
||||
|
||||
bind(retry_load);
|
||||
// flush and load exclusive from the memory location
|
||||
// and fail if it is not what we expect
|
||||
ldaxr(tmp, addr);
|
||||
cmp(tmp, oldv);
|
||||
br(Assembler::NE, nope);
|
||||
// if we store+flush with no intervening write tmp wil be zero
|
||||
stlxr(tmp, newv, addr);
|
||||
cbzw(tmp, succeed);
|
||||
// retry so we only ever return after a load fails to compare
|
||||
// ensures we don't return a stale value after a failed write.
|
||||
b(retry_load);
|
||||
// if the memory word differs we return it in oldv and signal a fail
|
||||
bind(nope);
|
||||
membar(AnyAny);
|
||||
mov(oldv, tmp);
|
||||
if (UseLSE) {
|
||||
mov(tmp, oldv);
|
||||
casal(Assembler::xword, oldv, newv, addr);
|
||||
cmp(tmp, oldv);
|
||||
br(Assembler::EQ, succeed);
|
||||
membar(AnyAny);
|
||||
} else {
|
||||
Label retry_load, nope;
|
||||
prfm(Address(addr), PSTL1STRM);
|
||||
bind(retry_load);
|
||||
// flush and load exclusive from the memory location
|
||||
// and fail if it is not what we expect
|
||||
ldaxr(tmp, addr);
|
||||
cmp(tmp, oldv);
|
||||
br(Assembler::NE, nope);
|
||||
// if we store+flush with no intervening write tmp wil be zero
|
||||
stlxr(tmp, newv, addr);
|
||||
cbzw(tmp, succeed);
|
||||
// retry so we only ever return after a load fails to compare
|
||||
// ensures we don't return a stale value after a failed write.
|
||||
b(retry_load);
|
||||
// if the memory word differs we return it in oldv and signal a fail
|
||||
bind(nope);
|
||||
membar(AnyAny);
|
||||
mov(oldv, tmp);
|
||||
}
|
||||
if (fail)
|
||||
b(*fail);
|
||||
}
|
||||
@ -2099,28 +2107,64 @@ void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Regis
|
||||
// newv holds value to write in exchange
|
||||
// addr identifies memory word to compare against/update
|
||||
// tmp returns 0/1 for success/failure
|
||||
Label retry_load, nope;
|
||||
|
||||
bind(retry_load);
|
||||
// flush and load exclusive from the memory location
|
||||
// and fail if it is not what we expect
|
||||
ldaxrw(tmp, addr);
|
||||
cmp(tmp, oldv);
|
||||
br(Assembler::NE, nope);
|
||||
// if we store+flush with no intervening write tmp wil be zero
|
||||
stlxrw(tmp, newv, addr);
|
||||
cbzw(tmp, succeed);
|
||||
// retry so we only ever return after a load fails to compare
|
||||
// ensures we don't return a stale value after a failed write.
|
||||
b(retry_load);
|
||||
// if the memory word differs we return it in oldv and signal a fail
|
||||
bind(nope);
|
||||
membar(AnyAny);
|
||||
mov(oldv, tmp);
|
||||
if (UseLSE) {
|
||||
mov(tmp, oldv);
|
||||
casal(Assembler::word, oldv, newv, addr);
|
||||
cmp(tmp, oldv);
|
||||
br(Assembler::EQ, succeed);
|
||||
membar(AnyAny);
|
||||
} else {
|
||||
Label retry_load, nope;
|
||||
prfm(Address(addr), PSTL1STRM);
|
||||
bind(retry_load);
|
||||
// flush and load exclusive from the memory location
|
||||
// and fail if it is not what we expect
|
||||
ldaxrw(tmp, addr);
|
||||
cmp(tmp, oldv);
|
||||
br(Assembler::NE, nope);
|
||||
// if we store+flush with no intervening write tmp wil be zero
|
||||
stlxrw(tmp, newv, addr);
|
||||
cbzw(tmp, succeed);
|
||||
// retry so we only ever return after a load fails to compare
|
||||
// ensures we don't return a stale value after a failed write.
|
||||
b(retry_load);
|
||||
// if the memory word differs we return it in oldv and signal a fail
|
||||
bind(nope);
|
||||
membar(AnyAny);
|
||||
mov(oldv, tmp);
|
||||
}
|
||||
if (fail)
|
||||
b(*fail);
|
||||
}
|
||||
|
||||
// A generic CAS; success or failure is in the EQ flag.
|
||||
void MacroAssembler::cmpxchg(Register addr, Register expected,
|
||||
Register new_val,
|
||||
enum operand_size size,
|
||||
bool acquire, bool release,
|
||||
Register tmp) {
|
||||
if (UseLSE) {
|
||||
mov(tmp, expected);
|
||||
lse_cas(tmp, new_val, addr, size, acquire, release, /*not_pair*/ true);
|
||||
cmp(tmp, expected);
|
||||
} else {
|
||||
BLOCK_COMMENT("cmpxchg {");
|
||||
Label retry_load, done;
|
||||
prfm(Address(addr), PSTL1STRM);
|
||||
bind(retry_load);
|
||||
load_exclusive(tmp, addr, size, acquire);
|
||||
if (size == xword)
|
||||
cmp(tmp, expected);
|
||||
else
|
||||
cmpw(tmp, expected);
|
||||
br(Assembler::NE, done);
|
||||
store_exclusive(tmp, new_val, addr, size, release);
|
||||
cbnzw(tmp, retry_load);
|
||||
bind(done);
|
||||
BLOCK_COMMENT("} cmpxchg");
|
||||
}
|
||||
}
|
||||
|
||||
static bool different(Register a, RegisterOrConstant b, Register c) {
|
||||
if (b.is_constant())
|
||||
return a != c;
|
||||
@ -2135,6 +2179,7 @@ void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Registe
|
||||
result = different(prev, incr, addr) ? prev : rscratch2; \
|
||||
\
|
||||
Label retry_load; \
|
||||
prfm(Address(addr), PSTL1STRM); \
|
||||
bind(retry_load); \
|
||||
LDXR(result, addr); \
|
||||
OP(rscratch1, result, incr); \
|
||||
@ -2157,6 +2202,7 @@ void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {
|
||||
result = different(prev, newv, addr) ? prev : rscratch2; \
|
||||
\
|
||||
Label retry_load; \
|
||||
prfm(Address(addr), PSTL1STRM); \
|
||||
bind(retry_load); \
|
||||
LDXR(result, addr); \
|
||||
STXR(rscratch1, newv, addr); \
|
||||
@ -4481,225 +4527,126 @@ void MacroAssembler::string_compare(Register str1, Register str2,
|
||||
BLOCK_COMMENT("} string_compare");
|
||||
}
|
||||
|
||||
// Compare Strings or char/byte arrays.
|
||||
|
||||
void MacroAssembler::string_equals(Register str1, Register str2,
|
||||
Register cnt, Register result,
|
||||
Register tmp1) {
|
||||
Label SAME_CHARS, DONE, SHORT_LOOP, SHORT_STRING,
|
||||
NEXT_WORD;
|
||||
// is_string is true iff this is a string comparison.
|
||||
|
||||
const Register tmp2 = rscratch1;
|
||||
assert_different_registers(str1, str2, cnt, result, tmp1, tmp2, rscratch2);
|
||||
// For Strings we're passed the address of the first characters in a1
|
||||
// and a2 and the length in cnt1.
|
||||
|
||||
BLOCK_COMMENT("string_equals {");
|
||||
// For byte and char arrays we're passed the arrays themselves and we
|
||||
// have to extract length fields and do null checks here.
|
||||
|
||||
// Start by assuming that the strings are not equal.
|
||||
mov(result, zr);
|
||||
// elem_size is the element size in bytes: either 1 or 2.
|
||||
|
||||
// A very short string
|
||||
cmpw(cnt, 4);
|
||||
br(Assembler::LT, SHORT_STRING);
|
||||
// There are two implementations. For arrays >= 8 bytes, all
|
||||
// comparisons (including the final one, which may overlap) are
|
||||
// performed 8 bytes at a time. For arrays < 8 bytes, we compare a
|
||||
// halfword, then a short, and then a byte.
|
||||
|
||||
// Check if the strings start at the same location.
|
||||
cmp(str1, str2);
|
||||
br(Assembler::EQ, SAME_CHARS);
|
||||
void MacroAssembler::arrays_equals(Register a1, Register a2,
|
||||
Register result, Register cnt1,
|
||||
int elem_size, bool is_string)
|
||||
{
|
||||
Label SAME, DONE, SHORT, NEXT_WORD, ONE;
|
||||
Register tmp1 = rscratch1;
|
||||
Register tmp2 = rscratch2;
|
||||
Register cnt2 = tmp2; // cnt2 only used in array length compare
|
||||
int elem_per_word = wordSize/elem_size;
|
||||
int log_elem_size = exact_log2(elem_size);
|
||||
int length_offset = arrayOopDesc::length_offset_in_bytes();
|
||||
int base_offset
|
||||
= arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
|
||||
|
||||
// Compare longwords
|
||||
{
|
||||
subw(cnt, cnt, 4); // The last longword is a special case
|
||||
assert(elem_size == 1 || elem_size == 2, "must be char or byte");
|
||||
assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
|
||||
|
||||
// Move both string pointers to the last longword of their
|
||||
// strings, negate the remaining count, and convert it to bytes.
|
||||
lea(str1, Address(str1, cnt, Address::uxtw(1)));
|
||||
lea(str2, Address(str2, cnt, Address::uxtw(1)));
|
||||
sub(cnt, zr, cnt, LSL, 1);
|
||||
BLOCK_COMMENT(is_string ? "string_equals {" : "array_equals {");
|
||||
|
||||
// Loop, loading longwords and comparing them into rscratch2.
|
||||
bind(NEXT_WORD);
|
||||
ldr(tmp1, Address(str1, cnt));
|
||||
ldr(tmp2, Address(str2, cnt));
|
||||
adds(cnt, cnt, wordSize);
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbnz(rscratch2, DONE);
|
||||
br(Assembler::LT, NEXT_WORD);
|
||||
mov(result, false);
|
||||
|
||||
// Last longword. In the case where length == 4 we compare the
|
||||
// same longword twice, but that's still faster than another
|
||||
// conditional branch.
|
||||
if (!is_string) {
|
||||
// if (a==a2)
|
||||
// return true;
|
||||
eor(rscratch1, a1, a2);
|
||||
cbz(rscratch1, SAME);
|
||||
// if (a==null || a2==null)
|
||||
// return false;
|
||||
cbz(a1, DONE);
|
||||
cbz(a2, DONE);
|
||||
// if (a1.length != a2.length)
|
||||
// return false;
|
||||
ldrw(cnt1, Address(a1, length_offset));
|
||||
ldrw(cnt2, Address(a2, length_offset));
|
||||
eorw(tmp1, cnt1, cnt2);
|
||||
cbnzw(tmp1, DONE);
|
||||
|
||||
ldr(tmp1, Address(str1));
|
||||
ldr(tmp2, Address(str2));
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbz(rscratch2, SAME_CHARS);
|
||||
b(DONE);
|
||||
lea(a1, Address(a1, base_offset));
|
||||
lea(a2, Address(a2, base_offset));
|
||||
}
|
||||
|
||||
bind(SHORT_STRING);
|
||||
// Is the length zero?
|
||||
cbz(cnt, SAME_CHARS);
|
||||
|
||||
bind(SHORT_LOOP);
|
||||
load_unsigned_short(tmp1, Address(post(str1, 2)));
|
||||
load_unsigned_short(tmp2, Address(post(str2, 2)));
|
||||
subw(tmp1, tmp1, tmp2);
|
||||
// Check for short strings, i.e. smaller than wordSize.
|
||||
subs(cnt1, cnt1, elem_per_word);
|
||||
br(Assembler::LT, SHORT);
|
||||
// Main 8 byte comparison loop.
|
||||
bind(NEXT_WORD); {
|
||||
ldr(tmp1, Address(post(a1, wordSize)));
|
||||
ldr(tmp2, Address(post(a2, wordSize)));
|
||||
subs(cnt1, cnt1, elem_per_word);
|
||||
eor(tmp1, tmp1, tmp2);
|
||||
cbnz(tmp1, DONE);
|
||||
} br(GT, NEXT_WORD);
|
||||
// Last longword. In the case where length == 4 we compare the
|
||||
// same longword twice, but that's still faster than another
|
||||
// conditional branch.
|
||||
// cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
|
||||
// length == 4.
|
||||
if (log_elem_size > 0)
|
||||
lsl(cnt1, cnt1, log_elem_size);
|
||||
ldr(tmp1, Address(a1, cnt1));
|
||||
ldr(tmp2, Address(a2, cnt1));
|
||||
eor(tmp1, tmp1, tmp2);
|
||||
cbnz(tmp1, DONE);
|
||||
sub(cnt, cnt, 1);
|
||||
cbnz(cnt, SHORT_LOOP);
|
||||
b(SAME);
|
||||
|
||||
// Strings are equal.
|
||||
bind(SAME_CHARS);
|
||||
bind(SHORT);
|
||||
Label TAIL03, TAIL01;
|
||||
|
||||
tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left.
|
||||
{
|
||||
ldrw(tmp1, Address(post(a1, 4)));
|
||||
ldrw(tmp2, Address(post(a2, 4)));
|
||||
eorw(tmp1, tmp1, tmp2);
|
||||
cbnzw(tmp1, DONE);
|
||||
}
|
||||
bind(TAIL03);
|
||||
tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left.
|
||||
{
|
||||
ldrh(tmp1, Address(post(a1, 2)));
|
||||
ldrh(tmp2, Address(post(a2, 2)));
|
||||
eorw(tmp1, tmp1, tmp2);
|
||||
cbnzw(tmp1, DONE);
|
||||
}
|
||||
bind(TAIL01);
|
||||
if (elem_size == 1) { // Only needed when comparing byte arrays.
|
||||
tbz(cnt1, 0, SAME); // 0-1 bytes left.
|
||||
{
|
||||
ldrb(tmp1, a1);
|
||||
ldrb(tmp2, a2);
|
||||
eorw(tmp1, tmp1, tmp2);
|
||||
cbnzw(tmp1, DONE);
|
||||
}
|
||||
}
|
||||
// Arrays are equal.
|
||||
bind(SAME);
|
||||
mov(result, true);
|
||||
|
||||
// That's it
|
||||
// That's it.
|
||||
bind(DONE);
|
||||
|
||||
BLOCK_COMMENT("} string_equals");
|
||||
BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::byte_arrays_equals(Register ary1, Register ary2,
|
||||
Register result, Register tmp1)
|
||||
{
|
||||
Register cnt1 = rscratch1;
|
||||
Register cnt2 = rscratch2;
|
||||
Register tmp2 = rscratch2;
|
||||
|
||||
Label SAME, DIFFER, NEXT, TAIL07, TAIL03, TAIL01;
|
||||
|
||||
int length_offset = arrayOopDesc::length_offset_in_bytes();
|
||||
int base_offset = arrayOopDesc::base_offset_in_bytes(T_BYTE);
|
||||
|
||||
BLOCK_COMMENT("byte_arrays_equals {");
|
||||
|
||||
// different until proven equal
|
||||
mov(result, false);
|
||||
|
||||
// same array?
|
||||
cmp(ary1, ary2);
|
||||
br(Assembler::EQ, SAME);
|
||||
|
||||
// ne if either null
|
||||
cbz(ary1, DIFFER);
|
||||
cbz(ary2, DIFFER);
|
||||
|
||||
// lengths ne?
|
||||
ldrw(cnt1, Address(ary1, length_offset));
|
||||
ldrw(cnt2, Address(ary2, length_offset));
|
||||
cmp(cnt1, cnt2);
|
||||
br(Assembler::NE, DIFFER);
|
||||
|
||||
lea(ary1, Address(ary1, base_offset));
|
||||
lea(ary2, Address(ary2, base_offset));
|
||||
|
||||
subs(cnt1, cnt1, 8);
|
||||
br(LT, TAIL07);
|
||||
|
||||
BIND(NEXT);
|
||||
ldr(tmp1, Address(post(ary1, 8)));
|
||||
ldr(tmp2, Address(post(ary2, 8)));
|
||||
subs(cnt1, cnt1, 8);
|
||||
eor(tmp1, tmp1, tmp2);
|
||||
cbnz(tmp1, DIFFER);
|
||||
br(GE, NEXT);
|
||||
|
||||
BIND(TAIL07); // 0-7 bytes left, cnt1 = #bytes left - 4
|
||||
tst(cnt1, 0b100);
|
||||
br(EQ, TAIL03);
|
||||
ldrw(tmp1, Address(post(ary1, 4)));
|
||||
ldrw(tmp2, Address(post(ary2, 4)));
|
||||
cmp(tmp1, tmp2);
|
||||
br(NE, DIFFER);
|
||||
|
||||
BIND(TAIL03); // 0-3 bytes left, cnt1 = #bytes left - 4
|
||||
tst(cnt1, 0b10);
|
||||
br(EQ, TAIL01);
|
||||
ldrh(tmp1, Address(post(ary1, 2)));
|
||||
ldrh(tmp2, Address(post(ary2, 2)));
|
||||
cmp(tmp1, tmp2);
|
||||
br(NE, DIFFER);
|
||||
BIND(TAIL01); // 0-1 byte left
|
||||
tst(cnt1, 0b01);
|
||||
br(EQ, SAME);
|
||||
ldrb(tmp1, ary1);
|
||||
ldrb(tmp2, ary2);
|
||||
cmp(tmp1, tmp2);
|
||||
br(NE, DIFFER);
|
||||
|
||||
BIND(SAME);
|
||||
mov(result, true);
|
||||
BIND(DIFFER); // result already set
|
||||
|
||||
BLOCK_COMMENT("} byte_arrays_equals");
|
||||
}
|
||||
|
||||
// Compare char[] arrays aligned to 4 bytes
|
||||
void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
|
||||
Register result, Register tmp1)
|
||||
{
|
||||
Register cnt1 = rscratch1;
|
||||
Register cnt2 = rscratch2;
|
||||
Register tmp2 = rscratch2;
|
||||
|
||||
Label SAME, DIFFER, NEXT, TAIL03, TAIL01;
|
||||
|
||||
int length_offset = arrayOopDesc::length_offset_in_bytes();
|
||||
int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
|
||||
|
||||
BLOCK_COMMENT("char_arrays_equals {");
|
||||
|
||||
// different until proven equal
|
||||
mov(result, false);
|
||||
|
||||
// same array?
|
||||
cmp(ary1, ary2);
|
||||
br(Assembler::EQ, SAME);
|
||||
|
||||
// ne if either null
|
||||
cbz(ary1, DIFFER);
|
||||
cbz(ary2, DIFFER);
|
||||
|
||||
// lengths ne?
|
||||
ldrw(cnt1, Address(ary1, length_offset));
|
||||
ldrw(cnt2, Address(ary2, length_offset));
|
||||
cmp(cnt1, cnt2);
|
||||
br(Assembler::NE, DIFFER);
|
||||
|
||||
lea(ary1, Address(ary1, base_offset));
|
||||
lea(ary2, Address(ary2, base_offset));
|
||||
|
||||
subs(cnt1, cnt1, 4);
|
||||
br(LT, TAIL03);
|
||||
|
||||
BIND(NEXT);
|
||||
ldr(tmp1, Address(post(ary1, 8)));
|
||||
ldr(tmp2, Address(post(ary2, 8)));
|
||||
subs(cnt1, cnt1, 4);
|
||||
eor(tmp1, tmp1, tmp2);
|
||||
cbnz(tmp1, DIFFER);
|
||||
br(GE, NEXT);
|
||||
|
||||
BIND(TAIL03); // 0-3 chars left, cnt1 = #chars left - 4
|
||||
tst(cnt1, 0b10);
|
||||
br(EQ, TAIL01);
|
||||
ldrw(tmp1, Address(post(ary1, 4)));
|
||||
ldrw(tmp2, Address(post(ary2, 4)));
|
||||
cmp(tmp1, tmp2);
|
||||
br(NE, DIFFER);
|
||||
BIND(TAIL01); // 0-1 chars left
|
||||
tst(cnt1, 0b01);
|
||||
br(EQ, SAME);
|
||||
ldrh(tmp1, ary1);
|
||||
ldrh(tmp2, ary2);
|
||||
cmp(tmp1, tmp2);
|
||||
br(NE, DIFFER);
|
||||
|
||||
BIND(SAME);
|
||||
mov(result, true);
|
||||
BIND(DIFFER); // result already set
|
||||
|
||||
BLOCK_COMMENT("} char_arrays_equals");
|
||||
}
|
||||
|
||||
// encode char[] to byte[] in ISO_8859_1
|
||||
void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
Register len, Register result,
|
||||
|
||||
@ -971,21 +971,10 @@ public:
|
||||
}
|
||||
|
||||
// A generic CAS; success or failure is in the EQ flag.
|
||||
template <typename T1, typename T2>
|
||||
void cmpxchg(Register addr, Register expected, Register new_val,
|
||||
T1 load_insn,
|
||||
void (MacroAssembler::*cmp_insn)(Register, Register),
|
||||
T2 store_insn,
|
||||
Register tmp = rscratch1) {
|
||||
Label retry_load, done;
|
||||
bind(retry_load);
|
||||
(this->*load_insn)(tmp, addr);
|
||||
(this->*cmp_insn)(tmp, expected);
|
||||
br(Assembler::NE, done);
|
||||
(this->*store_insn)(tmp, new_val, addr);
|
||||
cbnzw(tmp, retry_load);
|
||||
bind(done);
|
||||
}
|
||||
enum operand_size size,
|
||||
bool acquire, bool release,
|
||||
Register tmp = rscratch1);
|
||||
|
||||
// Calls
|
||||
|
||||
@ -1186,13 +1175,11 @@ public:
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
Register tmp1);
|
||||
void string_equals(Register str1, Register str2,
|
||||
Register cnt, Register result,
|
||||
Register tmp1);
|
||||
void char_arrays_equals(Register ary1, Register ary2,
|
||||
Register result, Register tmp1);
|
||||
void byte_arrays_equals(Register ary1, Register ary2,
|
||||
Register result, Register tmp1);
|
||||
|
||||
void arrays_equals(Register a1, Register a2,
|
||||
Register result, Register cnt1,
|
||||
int elem_size, bool is_string);
|
||||
|
||||
void encode_iso_array(Register src, Register dst,
|
||||
Register len, Register result,
|
||||
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
||||
|
||||
@ -105,13 +105,20 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {
|
||||
inline friend NativeInstruction* nativeInstruction_at(address address);
|
||||
|
||||
static bool is_adrp_at(address instr);
|
||||
|
||||
static bool is_ldr_literal_at(address instr);
|
||||
|
||||
bool is_ldr_literal() {
|
||||
return is_ldr_literal_at(addr_at(0));
|
||||
}
|
||||
|
||||
static bool is_ldrw_to_zr(address instr);
|
||||
|
||||
static bool is_call_at(address instr) {
|
||||
const uint32_t insn = (*(uint32_t*)instr);
|
||||
return (insn >> 26) == 0b100101;
|
||||
}
|
||||
|
||||
bool is_call() {
|
||||
return is_call_at(addr_at(0));
|
||||
}
|
||||
|
||||
@ -107,6 +107,9 @@ CONSTANT_REGISTER_DECLARATION(Register, r31_sp, (31));
|
||||
CONSTANT_REGISTER_DECLARATION(Register, zr, (32));
|
||||
CONSTANT_REGISTER_DECLARATION(Register, sp, (33));
|
||||
|
||||
// Used as a filler in instructions where a register field is unused.
|
||||
const Register dummy_reg = r31_sp;
|
||||
|
||||
// Use FloatRegister as shortcut
|
||||
class FloatRegisterImpl;
|
||||
typedef FloatRegisterImpl* FloatRegister;
|
||||
|
||||
@ -163,30 +163,20 @@ class StubGenerator: public StubCodeGenerator {
|
||||
sp_after_call_off = -26,
|
||||
|
||||
d15_off = -26,
|
||||
d14_off = -25,
|
||||
d13_off = -24,
|
||||
d12_off = -23,
|
||||
d11_off = -22,
|
||||
d10_off = -21,
|
||||
d9_off = -20,
|
||||
d8_off = -19,
|
||||
|
||||
r28_off = -18,
|
||||
r27_off = -17,
|
||||
r26_off = -16,
|
||||
r25_off = -15,
|
||||
r24_off = -14,
|
||||
r23_off = -13,
|
||||
r22_off = -12,
|
||||
r21_off = -11,
|
||||
r20_off = -10,
|
||||
r19_off = -9,
|
||||
call_wrapper_off = -8,
|
||||
result_off = -7,
|
||||
result_type_off = -6,
|
||||
method_off = -5,
|
||||
entry_point_off = -4,
|
||||
parameters_off = -3,
|
||||
parameter_size_off = -2,
|
||||
thread_off = -1,
|
||||
fp_f = 0,
|
||||
@ -208,30 +198,20 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Address result_type (rfp, result_type_off * wordSize);
|
||||
const Address method (rfp, method_off * wordSize);
|
||||
const Address entry_point (rfp, entry_point_off * wordSize);
|
||||
const Address parameters (rfp, parameters_off * wordSize);
|
||||
const Address parameter_size(rfp, parameter_size_off * wordSize);
|
||||
|
||||
const Address thread (rfp, thread_off * wordSize);
|
||||
|
||||
const Address d15_save (rfp, d15_off * wordSize);
|
||||
const Address d14_save (rfp, d14_off * wordSize);
|
||||
const Address d13_save (rfp, d13_off * wordSize);
|
||||
const Address d12_save (rfp, d12_off * wordSize);
|
||||
const Address d11_save (rfp, d11_off * wordSize);
|
||||
const Address d10_save (rfp, d10_off * wordSize);
|
||||
const Address d9_save (rfp, d9_off * wordSize);
|
||||
const Address d8_save (rfp, d8_off * wordSize);
|
||||
|
||||
const Address r28_save (rfp, r28_off * wordSize);
|
||||
const Address r27_save (rfp, r27_off * wordSize);
|
||||
const Address r26_save (rfp, r26_off * wordSize);
|
||||
const Address r25_save (rfp, r25_off * wordSize);
|
||||
const Address r24_save (rfp, r24_off * wordSize);
|
||||
const Address r23_save (rfp, r23_off * wordSize);
|
||||
const Address r22_save (rfp, r22_off * wordSize);
|
||||
const Address r21_save (rfp, r21_off * wordSize);
|
||||
const Address r20_save (rfp, r20_off * wordSize);
|
||||
const Address r19_save (rfp, r19_off * wordSize);
|
||||
|
||||
// stub code
|
||||
|
||||
@ -254,31 +234,20 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// rthread because we want to sanity check rthread later
|
||||
__ str(c_rarg7, thread);
|
||||
__ strw(c_rarg6, parameter_size);
|
||||
__ str(c_rarg5, parameters);
|
||||
__ str(c_rarg4, entry_point);
|
||||
__ str(c_rarg3, method);
|
||||
__ str(c_rarg2, result_type);
|
||||
__ str(c_rarg1, result);
|
||||
__ str(c_rarg0, call_wrapper);
|
||||
__ str(r19, r19_save);
|
||||
__ str(r20, r20_save);
|
||||
__ str(r21, r21_save);
|
||||
__ str(r22, r22_save);
|
||||
__ str(r23, r23_save);
|
||||
__ str(r24, r24_save);
|
||||
__ str(r25, r25_save);
|
||||
__ str(r26, r26_save);
|
||||
__ str(r27, r27_save);
|
||||
__ str(r28, r28_save);
|
||||
__ stp(c_rarg4, c_rarg5, entry_point);
|
||||
__ stp(c_rarg2, c_rarg3, result_type);
|
||||
__ stp(c_rarg0, c_rarg1, call_wrapper);
|
||||
|
||||
__ strd(v8, d8_save);
|
||||
__ strd(v9, d9_save);
|
||||
__ strd(v10, d10_save);
|
||||
__ strd(v11, d11_save);
|
||||
__ strd(v12, d12_save);
|
||||
__ strd(v13, d13_save);
|
||||
__ strd(v14, d14_save);
|
||||
__ strd(v15, d15_save);
|
||||
__ stp(r20, r19, r20_save);
|
||||
__ stp(r22, r21, r22_save);
|
||||
__ stp(r24, r23, r24_save);
|
||||
__ stp(r26, r25, r26_save);
|
||||
__ stp(r28, r27, r28_save);
|
||||
|
||||
__ stpd(v9, v8, d9_save);
|
||||
__ stpd(v11, v10, d11_save);
|
||||
__ stpd(v13, v12, d13_save);
|
||||
__ stpd(v15, v14, d15_save);
|
||||
|
||||
// install Java thread in global register now we have saved
|
||||
// whatever value it held
|
||||
@ -385,33 +354,22 @@ class StubGenerator: public StubCodeGenerator {
|
||||
#endif
|
||||
|
||||
// restore callee-save registers
|
||||
__ ldrd(v15, d15_save);
|
||||
__ ldrd(v14, d14_save);
|
||||
__ ldrd(v13, d13_save);
|
||||
__ ldrd(v12, d12_save);
|
||||
__ ldrd(v11, d11_save);
|
||||
__ ldrd(v10, d10_save);
|
||||
__ ldrd(v9, d9_save);
|
||||
__ ldrd(v8, d8_save);
|
||||
__ ldpd(v15, v14, d15_save);
|
||||
__ ldpd(v13, v12, d13_save);
|
||||
__ ldpd(v11, v10, d11_save);
|
||||
__ ldpd(v9, v8, d9_save);
|
||||
|
||||
__ ldr(r28, r28_save);
|
||||
__ ldr(r27, r27_save);
|
||||
__ ldr(r26, r26_save);
|
||||
__ ldr(r25, r25_save);
|
||||
__ ldr(r24, r24_save);
|
||||
__ ldr(r23, r23_save);
|
||||
__ ldr(r22, r22_save);
|
||||
__ ldr(r21, r21_save);
|
||||
__ ldr(r20, r20_save);
|
||||
__ ldr(r19, r19_save);
|
||||
__ ldr(c_rarg0, call_wrapper);
|
||||
__ ldr(c_rarg1, result);
|
||||
__ ldp(r28, r27, r28_save);
|
||||
__ ldp(r26, r25, r26_save);
|
||||
__ ldp(r24, r23, r24_save);
|
||||
__ ldp(r22, r21, r22_save);
|
||||
__ ldp(r20, r19, r20_save);
|
||||
|
||||
__ ldp(c_rarg0, c_rarg1, call_wrapper);
|
||||
__ ldrw(c_rarg2, result_type);
|
||||
__ ldr(c_rarg3, method);
|
||||
__ ldr(c_rarg4, entry_point);
|
||||
__ ldr(c_rarg5, parameters);
|
||||
__ ldr(c_rarg6, parameter_size);
|
||||
__ ldr(c_rarg7, thread);
|
||||
__ ldp(c_rarg4, c_rarg5, entry_point);
|
||||
__ ldp(c_rarg6, c_rarg7, parameter_size);
|
||||
|
||||
#ifndef PRODUCT
|
||||
// tell the simulator we are about to end Java execution
|
||||
@ -771,7 +729,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
//
|
||||
// count is a count of words.
|
||||
//
|
||||
// Precondition: count >= 2
|
||||
// Precondition: count >= 8
|
||||
//
|
||||
// Postconditions:
|
||||
//
|
||||
@ -783,6 +741,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
void generate_copy_longs(Label &start, Register s, Register d, Register count,
|
||||
copy_direction direction) {
|
||||
int unit = wordSize * direction;
|
||||
int bias = (UseSIMDForMemoryOps ? 4:2) * wordSize;
|
||||
|
||||
int offset;
|
||||
const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6,
|
||||
@ -792,7 +751,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7);
|
||||
assert_different_registers(s, d, count, rscratch1);
|
||||
|
||||
Label again, large, small;
|
||||
Label again, drain;
|
||||
const char *stub_name;
|
||||
if (direction == copy_forwards)
|
||||
stub_name = "foward_copy_longs";
|
||||
@ -801,57 +760,35 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
__ align(CodeEntryAlignment);
|
||||
__ bind(start);
|
||||
__ cmp(count, 8);
|
||||
__ br(Assembler::LO, small);
|
||||
if (direction == copy_forwards) {
|
||||
__ sub(s, s, 2 * wordSize);
|
||||
__ sub(d, d, 2 * wordSize);
|
||||
}
|
||||
__ subs(count, count, 16);
|
||||
__ br(Assembler::GE, large);
|
||||
|
||||
// 8 <= count < 16 words. Copy 8.
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
|
||||
__ stp(t0, t1, Address(d, 2 * unit));
|
||||
__ stp(t2, t3, Address(d, 4 * unit));
|
||||
__ stp(t4, t5, Address(d, 6 * unit));
|
||||
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
||||
|
||||
if (direction == copy_forwards) {
|
||||
__ add(s, s, 2 * wordSize);
|
||||
__ add(d, d, 2 * wordSize);
|
||||
__ sub(s, s, bias);
|
||||
__ sub(d, d, bias);
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
// Make sure we are never given < 8 words
|
||||
{
|
||||
Label L1, L2;
|
||||
__ bind(small);
|
||||
__ tbz(count, exact_log2(4), L1);
|
||||
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
||||
__ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
||||
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
||||
__ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
||||
__ bind(L1);
|
||||
|
||||
__ tbz(count, 1, L2);
|
||||
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
||||
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
||||
__ bind(L2);
|
||||
Label L;
|
||||
__ cmp(count, 8);
|
||||
__ br(Assembler::GE, L);
|
||||
__ stop("genrate_copy_longs called with < 8 words");
|
||||
__ bind(L);
|
||||
}
|
||||
|
||||
__ ret(lr);
|
||||
|
||||
__ align(CodeEntryAlignment);
|
||||
__ bind(large);
|
||||
#endif
|
||||
|
||||
// Fill 8 registers
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ ldpq(v0, v1, Address(s, 4 * unit));
|
||||
__ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
|
||||
} else {
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
}
|
||||
|
||||
__ subs(count, count, 16);
|
||||
__ br(Assembler::LO, drain);
|
||||
|
||||
int prefetch = PrefetchCopyIntervalInBytes;
|
||||
bool use_stride = false;
|
||||
@ -866,38 +803,56 @@ class StubGenerator: public StubCodeGenerator {
|
||||
if (PrefetchCopyIntervalInBytes > 0)
|
||||
__ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
|
||||
|
||||
__ stp(t0, t1, Address(d, 2 * unit));
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ stp(t2, t3, Address(d, 4 * unit));
|
||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||
__ stp(t4, t5, Address(d, 6 * unit));
|
||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ stpq(v0, v1, Address(d, 4 * unit));
|
||||
__ ldpq(v0, v1, Address(s, 4 * unit));
|
||||
__ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
|
||||
__ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
|
||||
} else {
|
||||
__ stp(t0, t1, Address(d, 2 * unit));
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ stp(t2, t3, Address(d, 4 * unit));
|
||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||
__ stp(t4, t5, Address(d, 6 * unit));
|
||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
}
|
||||
|
||||
__ subs(count, count, 8);
|
||||
__ br(Assembler::HS, again);
|
||||
|
||||
// Drain
|
||||
__ stp(t0, t1, Address(d, 2 * unit));
|
||||
__ stp(t2, t3, Address(d, 4 * unit));
|
||||
__ stp(t4, t5, Address(d, 6 * unit));
|
||||
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
||||
|
||||
if (direction == copy_forwards) {
|
||||
__ add(s, s, 2 * wordSize);
|
||||
__ add(d, d, 2 * wordSize);
|
||||
__ bind(drain);
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ stpq(v0, v1, Address(d, 4 * unit));
|
||||
__ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
|
||||
} else {
|
||||
__ stp(t0, t1, Address(d, 2 * unit));
|
||||
__ stp(t2, t3, Address(d, 4 * unit));
|
||||
__ stp(t4, t5, Address(d, 6 * unit));
|
||||
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
||||
}
|
||||
|
||||
{
|
||||
Label L1, L2;
|
||||
__ tbz(count, exact_log2(4), L1);
|
||||
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
||||
__ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
||||
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
||||
__ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ ldpq(v0, v1, Address(__ pre(s, 4 * unit)));
|
||||
__ stpq(v0, v1, Address(__ pre(d, 4 * unit)));
|
||||
} else {
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ ldp(t2, t3, Address(__ pre(s, 4 * unit)));
|
||||
__ stp(t0, t1, Address(d, 2 * unit));
|
||||
__ stp(t2, t3, Address(__ pre(d, 4 * unit)));
|
||||
}
|
||||
__ bind(L1);
|
||||
|
||||
if (direction == copy_forwards) {
|
||||
__ add(s, s, bias);
|
||||
__ add(d, d, bias);
|
||||
}
|
||||
|
||||
__ tbz(count, 1, L2);
|
||||
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
||||
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
||||
@ -973,16 +928,135 @@ class StubGenerator: public StubCodeGenerator {
|
||||
int granularity = uabs(step);
|
||||
const Register t0 = r3, t1 = r4;
|
||||
|
||||
// <= 96 bytes do inline. Direction doesn't matter because we always
|
||||
// load all the data before writing anything
|
||||
Label copy4, copy8, copy16, copy32, copy80, copy128, copy_big, finish;
|
||||
const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
|
||||
const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
|
||||
const Register send = r17, dend = r18;
|
||||
|
||||
if (PrefetchCopyIntervalInBytes > 0)
|
||||
__ prfm(Address(s, 0), PLDL1KEEP);
|
||||
__ cmp(count, (UseSIMDForMemoryOps ? 96:80)/granularity);
|
||||
__ br(Assembler::HI, copy_big);
|
||||
|
||||
__ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
|
||||
__ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
|
||||
|
||||
__ cmp(count, 16/granularity);
|
||||
__ br(Assembler::LS, copy16);
|
||||
|
||||
__ cmp(count, 64/granularity);
|
||||
__ br(Assembler::HI, copy80);
|
||||
|
||||
__ cmp(count, 32/granularity);
|
||||
__ br(Assembler::LS, copy32);
|
||||
|
||||
// 33..64 bytes
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ ldpq(v0, v1, Address(s, 0));
|
||||
__ ldpq(v2, v3, Address(send, -32));
|
||||
__ stpq(v0, v1, Address(d, 0));
|
||||
__ stpq(v2, v3, Address(dend, -32));
|
||||
} else {
|
||||
__ ldp(t0, t1, Address(s, 0));
|
||||
__ ldp(t2, t3, Address(s, 16));
|
||||
__ ldp(t4, t5, Address(send, -32));
|
||||
__ ldp(t6, t7, Address(send, -16));
|
||||
|
||||
__ stp(t0, t1, Address(d, 0));
|
||||
__ stp(t2, t3, Address(d, 16));
|
||||
__ stp(t4, t5, Address(dend, -32));
|
||||
__ stp(t6, t7, Address(dend, -16));
|
||||
}
|
||||
__ b(finish);
|
||||
|
||||
// 17..32 bytes
|
||||
__ bind(copy32);
|
||||
__ ldp(t0, t1, Address(s, 0));
|
||||
__ ldp(t2, t3, Address(send, -16));
|
||||
__ stp(t0, t1, Address(d, 0));
|
||||
__ stp(t2, t3, Address(dend, -16));
|
||||
__ b(finish);
|
||||
|
||||
// 65..80/96 bytes
|
||||
// (96 bytes if SIMD because we do 32 byes per instruction)
|
||||
__ bind(copy80);
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ ldpq(v0, v1, Address(s, 0));
|
||||
__ ldpq(v2, v3, Address(s, 32));
|
||||
__ ldpq(v4, v5, Address(send, -32));
|
||||
__ stpq(v0, v1, Address(d, 0));
|
||||
__ stpq(v2, v3, Address(d, 32));
|
||||
__ stpq(v4, v5, Address(dend, -32));
|
||||
} else {
|
||||
__ ldp(t0, t1, Address(s, 0));
|
||||
__ ldp(t2, t3, Address(s, 16));
|
||||
__ ldp(t4, t5, Address(s, 32));
|
||||
__ ldp(t6, t7, Address(s, 48));
|
||||
__ ldp(t8, t9, Address(send, -16));
|
||||
|
||||
__ stp(t0, t1, Address(d, 0));
|
||||
__ stp(t2, t3, Address(d, 16));
|
||||
__ stp(t4, t5, Address(d, 32));
|
||||
__ stp(t6, t7, Address(d, 48));
|
||||
__ stp(t8, t9, Address(dend, -16));
|
||||
}
|
||||
__ b(finish);
|
||||
|
||||
// 0..16 bytes
|
||||
__ bind(copy16);
|
||||
__ cmp(count, 8/granularity);
|
||||
__ br(Assembler::LO, copy8);
|
||||
|
||||
// 8..16 bytes
|
||||
__ ldr(t0, Address(s, 0));
|
||||
__ ldr(t1, Address(send, -8));
|
||||
__ str(t0, Address(d, 0));
|
||||
__ str(t1, Address(dend, -8));
|
||||
__ b(finish);
|
||||
|
||||
if (granularity < 8) {
|
||||
// 4..7 bytes
|
||||
__ bind(copy8);
|
||||
__ tbz(count, 2 - exact_log2(granularity), copy4);
|
||||
__ ldrw(t0, Address(s, 0));
|
||||
__ ldrw(t1, Address(send, -4));
|
||||
__ strw(t0, Address(d, 0));
|
||||
__ strw(t1, Address(dend, -4));
|
||||
__ b(finish);
|
||||
if (granularity < 4) {
|
||||
// 0..3 bytes
|
||||
__ bind(copy4);
|
||||
__ cbz(count, finish); // get rid of 0 case
|
||||
if (granularity == 2) {
|
||||
__ ldrh(t0, Address(s, 0));
|
||||
__ strh(t0, Address(d, 0));
|
||||
} else { // granularity == 1
|
||||
// Now 1..3 bytes. Handle the 1 and 2 byte case by copying
|
||||
// the first and last byte.
|
||||
// Handle the 3 byte case by loading and storing base + count/2
|
||||
// (count == 1 (s+0)->(d+0), count == 2,3 (s+1) -> (d+1))
|
||||
// This does means in the 1 byte case we load/store the same
|
||||
// byte 3 times.
|
||||
__ lsr(count, count, 1);
|
||||
__ ldrb(t0, Address(s, 0));
|
||||
__ ldrb(t1, Address(send, -1));
|
||||
__ ldrb(t2, Address(s, count));
|
||||
__ strb(t0, Address(d, 0));
|
||||
__ strb(t1, Address(dend, -1));
|
||||
__ strb(t2, Address(d, count));
|
||||
}
|
||||
__ b(finish);
|
||||
}
|
||||
}
|
||||
|
||||
__ bind(copy_big);
|
||||
if (is_backwards) {
|
||||
__ lea(s, Address(s, count, Address::lsl(exact_log2(-step))));
|
||||
__ lea(d, Address(d, count, Address::lsl(exact_log2(-step))));
|
||||
}
|
||||
|
||||
Label tail;
|
||||
|
||||
__ cmp(count, 16/granularity);
|
||||
__ br(Assembler::LO, tail);
|
||||
|
||||
// Now we've got the small case out of the way we can align the
|
||||
// source address on a 2-word boundary.
|
||||
|
||||
@ -1028,8 +1102,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
#endif
|
||||
}
|
||||
|
||||
__ cmp(count, 16/granularity);
|
||||
__ br(Assembler::LT, tail);
|
||||
__ bind(aligned);
|
||||
|
||||
// s is now 2-word-aligned.
|
||||
@ -1043,9 +1115,11 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ bl(copy_b);
|
||||
|
||||
// And the tail.
|
||||
|
||||
__ bind(tail);
|
||||
copy_memory_small(s, d, count, tmp, step);
|
||||
|
||||
if (granularity >= 8) __ bind(copy8);
|
||||
if (granularity >= 4) __ bind(copy4);
|
||||
__ bind(finish);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1984,6 +1984,7 @@ void TemplateInterpreterGenerator::count_bytecode() {
|
||||
__ push(rscratch3);
|
||||
Label L;
|
||||
__ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
|
||||
__ prfm(Address(rscratch2), PSTL1STRM);
|
||||
__ bind(L);
|
||||
__ ldxr(rscratch1, rscratch2);
|
||||
__ add(rscratch1, rscratch1, 1);
|
||||
|
||||
@ -61,6 +61,10 @@
|
||||
#define HWCAP_CRC32 (1<<7)
|
||||
#endif
|
||||
|
||||
#ifndef HWCAP_ATOMICS
|
||||
#define HWCAP_ATOMICS (1<<8)
|
||||
#endif
|
||||
|
||||
int VM_Version::_cpu;
|
||||
int VM_Version::_model;
|
||||
int VM_Version::_model2;
|
||||
@ -172,6 +176,7 @@ void VM_Version::get_processor_features() {
|
||||
if (auxv & HWCAP_AES) strcat(buf, ", aes");
|
||||
if (auxv & HWCAP_SHA1) strcat(buf, ", sha1");
|
||||
if (auxv & HWCAP_SHA2) strcat(buf, ", sha256");
|
||||
if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
|
||||
|
||||
_features_string = os::strdup(buf);
|
||||
|
||||
@ -191,6 +196,15 @@ void VM_Version::get_processor_features() {
|
||||
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
|
||||
}
|
||||
|
||||
if (auxv & HWCAP_ATOMICS) {
|
||||
if (FLAG_IS_DEFAULT(UseLSE))
|
||||
FLAG_SET_DEFAULT(UseLSE, true);
|
||||
} else {
|
||||
if (UseLSE) {
|
||||
warning("UseLSE specified, but not supported on this CPU");
|
||||
}
|
||||
}
|
||||
|
||||
if (auxv & HWCAP_AES) {
|
||||
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
|
||||
UseAESIntrinsics =
|
||||
|
||||
@ -47,7 +47,7 @@ const bool CCallingConventionRequiresIntsAsLongs = true;
|
||||
// The expected size in bytes of a cache line, used to pad data structures.
|
||||
#define DEFAULT_CACHE_LINE_SIZE 128
|
||||
|
||||
#if defined(COMPILER2) && defined(AIX)
|
||||
#if defined(COMPILER2) && (defined(AIX) || defined(linux))
|
||||
// Include Transactional Memory lock eliding optimization
|
||||
#define INCLUDE_RTM_OPT 1
|
||||
#endif
|
||||
|
||||
@ -76,6 +76,8 @@ define_pd_global(uintx, TypeProfileLevel, 111);
|
||||
|
||||
define_pd_global(bool, CompactStrings, true);
|
||||
|
||||
define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||
|
||||
// Platform dependent flag handling: flags only defined on this platform.
|
||||
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
|
||||
\
|
||||
|
||||
@ -2137,8 +2137,6 @@ MachTypeNode *Matcher::make_decode_node() {
|
||||
return decode;
|
||||
}
|
||||
*/
|
||||
// Threshold size for cleararray.
|
||||
const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
||||
|
||||
// false => size gets scaled to BytesPerLong, ok.
|
||||
const bool Matcher::init_array_count_is_in_bytes = false;
|
||||
|
||||
@ -255,7 +255,16 @@ void VM_Version::initialize() {
|
||||
}
|
||||
#endif
|
||||
#ifdef linux
|
||||
// TODO: check kernel version (we currently have too old versions only)
|
||||
// At least Linux kernel 4.2, as the problematic behavior of syscalls
|
||||
// being called in the middle of a transaction has been addressed.
|
||||
// Please, refer to commit b4b56f9ecab40f3b4ef53e130c9f6663be491894
|
||||
// in Linux kernel source tree: https://goo.gl/Kc5i7A
|
||||
if (os::Linux::os_version_is_known()) {
|
||||
if (os::Linux::os_version() >= 0x040200)
|
||||
os_too_old = false;
|
||||
} else {
|
||||
vm_exit_during_initialization("RTM can not be enabled: kernel version is unknown.");
|
||||
}
|
||||
#endif
|
||||
if (os_too_old) {
|
||||
vm_exit_during_initialization("RTM is not supported on this OS version.");
|
||||
|
||||
@ -90,6 +90,8 @@ define_pd_global(uintx, TypeProfileLevel, 111);
|
||||
|
||||
define_pd_global(bool, CompactStrings, true);
|
||||
|
||||
define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||
|
||||
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
|
||||
\
|
||||
product(intx, UseVIS, 99, \
|
||||
|
||||
@ -948,28 +948,28 @@ void emit_form3_mem_reg(CodeBuffer &cbuf, PhaseRegAlloc* ra, const MachNode* n,
|
||||
}
|
||||
#endif
|
||||
|
||||
uint instr;
|
||||
instr = (Assembler::ldst_op << 30)
|
||||
| (dst_enc << 25)
|
||||
| (primary << 19)
|
||||
| (src1_enc << 14);
|
||||
uint instr = (Assembler::ldst_op << 30)
|
||||
| (dst_enc << 25)
|
||||
| (primary << 19)
|
||||
| (src1_enc << 14);
|
||||
|
||||
uint index = src2_enc;
|
||||
int disp = disp32;
|
||||
|
||||
if (src1_enc == R_SP_enc || src1_enc == R_FP_enc) {
|
||||
disp += STACK_BIAS;
|
||||
// Quick fix for JDK-8029668: check that stack offset fits, bailout if not
|
||||
// Check that stack offset fits, load into O7 if not
|
||||
if (!Assembler::is_simm13(disp)) {
|
||||
ra->C->record_method_not_compilable("unable to handle large constant offsets");
|
||||
return;
|
||||
MacroAssembler _masm(&cbuf);
|
||||
__ set(disp, O7);
|
||||
if (index != R_G0_enc) {
|
||||
__ add(O7, reg_to_register_object(index), O7);
|
||||
}
|
||||
index = R_O7_enc;
|
||||
disp = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// We should have a compiler bailout here rather than a guarantee.
|
||||
// Better yet would be some mechanism to handle variable-size matches correctly.
|
||||
guarantee(Assembler::is_simm13(disp), "Do not match large constant offsets" );
|
||||
|
||||
if( disp == 0 ) {
|
||||
// use reg-reg form
|
||||
// bit 13 is already zero
|
||||
@ -983,7 +983,7 @@ void emit_form3_mem_reg(CodeBuffer &cbuf, PhaseRegAlloc* ra, const MachNode* n,
|
||||
cbuf.insts()->emit_int32(instr);
|
||||
|
||||
#ifdef ASSERT
|
||||
{
|
||||
if (VerifyOops) {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
if (is_verified_oop_base) {
|
||||
__ verify_oop(reg_to_register_object(src1_enc));
|
||||
@ -1342,7 +1342,7 @@ int MachEpilogNode::safepoint_offset() const {
|
||||
// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
|
||||
enum RC { rc_bad, rc_int, rc_float, rc_stack };
|
||||
static enum RC rc_class( OptoReg::Name reg ) {
|
||||
if( !OptoReg::is_valid(reg) ) return rc_bad;
|
||||
if (!OptoReg::is_valid(reg)) return rc_bad;
|
||||
if (OptoReg::is_stack(reg)) return rc_stack;
|
||||
VMReg r = OptoReg::as_VMReg(reg);
|
||||
if (r->is_Register()) return rc_int;
|
||||
@ -1350,66 +1350,79 @@ static enum RC rc_class( OptoReg::Name reg ) {
|
||||
return rc_float;
|
||||
}
|
||||
|
||||
static int impl_helper(const MachNode* mach, CodeBuffer* cbuf, PhaseRegAlloc* ra, bool do_size, bool is_load, int offset, int reg, int opcode, const char *op_str, int size, outputStream* st ) {
|
||||
#ifndef PRODUCT
|
||||
ATTRIBUTE_PRINTF(2, 3)
|
||||
static void print_helper(outputStream* st, const char* format, ...) {
|
||||
if (st->position() > 0) {
|
||||
st->cr();
|
||||
st->sp();
|
||||
}
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
st->vprint(format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
#endif // !PRODUCT
|
||||
|
||||
static void impl_helper(const MachNode* mach, CodeBuffer* cbuf, PhaseRegAlloc* ra, bool is_load, int offset, int reg, int opcode, const char *op_str, outputStream* st) {
|
||||
if (cbuf) {
|
||||
emit_form3_mem_reg(*cbuf, ra, mach, opcode, -1, R_SP_enc, offset, 0, Matcher::_regEncode[reg]);
|
||||
}
|
||||
#ifndef PRODUCT
|
||||
else if (!do_size) {
|
||||
if (size != 0) st->print("\n\t");
|
||||
if (is_load) st->print("%s [R_SP + #%d],R_%s\t! spill",op_str,offset,OptoReg::regname(reg));
|
||||
else st->print("%s R_%s,[R_SP + #%d]\t! spill",op_str,OptoReg::regname(reg),offset);
|
||||
else {
|
||||
if (is_load) {
|
||||
print_helper(st, "%s [R_SP + #%d],R_%s\t! spill", op_str, offset, OptoReg::regname(reg));
|
||||
} else {
|
||||
print_helper(st, "%s R_%s,[R_SP + #%d]\t! spill", op_str, OptoReg::regname(reg), offset);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return size+4;
|
||||
}
|
||||
|
||||
static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int op1, int op2, const char *op_str, int size, outputStream* st ) {
|
||||
if( cbuf ) emit3( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst], op1, 0, op2, Matcher::_regEncode[src] );
|
||||
static void impl_mov_helper(CodeBuffer *cbuf, int src, int dst, int op1, int op2, const char *op_str, outputStream* st) {
|
||||
if (cbuf) {
|
||||
emit3(*cbuf, Assembler::arith_op, Matcher::_regEncode[dst], op1, 0, op2, Matcher::_regEncode[src]);
|
||||
}
|
||||
#ifndef PRODUCT
|
||||
else if( !do_size ) {
|
||||
if( size != 0 ) st->print("\n\t");
|
||||
st->print("%s R_%s,R_%s\t! spill",op_str,OptoReg::regname(src),OptoReg::regname(dst));
|
||||
else {
|
||||
print_helper(st, "%s R_%s,R_%s\t! spill", op_str, OptoReg::regname(src), OptoReg::regname(dst));
|
||||
}
|
||||
#endif
|
||||
return size+4;
|
||||
}
|
||||
|
||||
uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
|
||||
PhaseRegAlloc *ra_,
|
||||
bool do_size,
|
||||
outputStream* st ) const {
|
||||
static void mach_spill_copy_implementation_helper(const MachNode* mach,
|
||||
CodeBuffer *cbuf,
|
||||
PhaseRegAlloc *ra_,
|
||||
outputStream* st) {
|
||||
// Get registers to move
|
||||
OptoReg::Name src_second = ra_->get_reg_second(in(1));
|
||||
OptoReg::Name src_first = ra_->get_reg_first(in(1));
|
||||
OptoReg::Name dst_second = ra_->get_reg_second(this );
|
||||
OptoReg::Name dst_first = ra_->get_reg_first(this );
|
||||
OptoReg::Name src_second = ra_->get_reg_second(mach->in(1));
|
||||
OptoReg::Name src_first = ra_->get_reg_first(mach->in(1));
|
||||
OptoReg::Name dst_second = ra_->get_reg_second(mach);
|
||||
OptoReg::Name dst_first = ra_->get_reg_first(mach);
|
||||
|
||||
enum RC src_second_rc = rc_class(src_second);
|
||||
enum RC src_first_rc = rc_class(src_first);
|
||||
enum RC src_first_rc = rc_class(src_first);
|
||||
enum RC dst_second_rc = rc_class(dst_second);
|
||||
enum RC dst_first_rc = rc_class(dst_first);
|
||||
enum RC dst_first_rc = rc_class(dst_first);
|
||||
|
||||
assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
|
||||
assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register");
|
||||
|
||||
// Generate spill code!
|
||||
int size = 0;
|
||||
|
||||
if( src_first == dst_first && src_second == dst_second )
|
||||
return size; // Self copy, no move
|
||||
if (src_first == dst_first && src_second == dst_second) {
|
||||
return; // Self copy, no move
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// Check for mem-mem move. Load into unused float registers and fall into
|
||||
// the float-store case.
|
||||
if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
|
||||
if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
|
||||
int offset = ra_->reg2offset(src_first);
|
||||
// Further check for aligned-adjacent pair, so we can use a double load
|
||||
if( (src_first&1)==0 && src_first+1 == src_second ) {
|
||||
if ((src_first&1) == 0 && src_first+1 == src_second) {
|
||||
src_second = OptoReg::Name(R_F31_num);
|
||||
src_second_rc = rc_float;
|
||||
size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F30_num,Assembler::lddf_op3,"LDDF",size, st);
|
||||
impl_helper(mach, cbuf, ra_, true, offset, R_F30_num, Assembler::lddf_op3, "LDDF", st);
|
||||
} else {
|
||||
size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F30_num,Assembler::ldf_op3 ,"LDF ",size, st);
|
||||
impl_helper(mach, cbuf, ra_, true, offset, R_F30_num, Assembler::ldf_op3, "LDF ", st);
|
||||
}
|
||||
src_first = OptoReg::Name(R_F30_num);
|
||||
src_first_rc = rc_float;
|
||||
@ -1417,7 +1430,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
|
||||
|
||||
if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) {
|
||||
int offset = ra_->reg2offset(src_second);
|
||||
size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F31_num,Assembler::ldf_op3,"LDF ",size, st);
|
||||
impl_helper(mach, cbuf, ra_, true, offset, R_F31_num, Assembler::ldf_op3, "LDF ", st);
|
||||
src_second = OptoReg::Name(R_F31_num);
|
||||
src_second_rc = rc_float;
|
||||
}
|
||||
@ -1427,36 +1440,38 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
|
||||
if (src_first_rc == rc_float && dst_first_rc == rc_int && UseVIS < 3) {
|
||||
int offset = frame::register_save_words*wordSize;
|
||||
if (cbuf) {
|
||||
emit3_simm13( *cbuf, Assembler::arith_op, R_SP_enc, Assembler::sub_op3, R_SP_enc, 16 );
|
||||
impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st);
|
||||
impl_helper(this,cbuf,ra_,do_size,true ,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st);
|
||||
emit3_simm13( *cbuf, Assembler::arith_op, R_SP_enc, Assembler::add_op3, R_SP_enc, 16 );
|
||||
emit3_simm13(*cbuf, Assembler::arith_op, R_SP_enc, Assembler::sub_op3, R_SP_enc, 16);
|
||||
impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stf_op3, "STF ", st);
|
||||
impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lduw_op3, "LDUW", st);
|
||||
emit3_simm13(*cbuf, Assembler::arith_op, R_SP_enc, Assembler::add_op3, R_SP_enc, 16);
|
||||
}
|
||||
#ifndef PRODUCT
|
||||
else if (!do_size) {
|
||||
if (size != 0) st->print("\n\t");
|
||||
st->print( "SUB R_SP,16,R_SP\n");
|
||||
impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st);
|
||||
impl_helper(this,cbuf,ra_,do_size,true ,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st);
|
||||
st->print("\tADD R_SP,16,R_SP\n");
|
||||
else {
|
||||
print_helper(st, "SUB R_SP,16,R_SP");
|
||||
impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stf_op3, "STF ", st);
|
||||
impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lduw_op3, "LDUW", st);
|
||||
print_helper(st, "ADD R_SP,16,R_SP");
|
||||
}
|
||||
#endif
|
||||
size += 16;
|
||||
}
|
||||
|
||||
// Check for float->int copy on T4
|
||||
if (src_first_rc == rc_float && dst_first_rc == rc_int && UseVIS >= 3) {
|
||||
// Further check for aligned-adjacent pair, so we can use a double move
|
||||
if ((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second)
|
||||
return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mdtox_opf,"MOVDTOX",size, st);
|
||||
size = impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mstouw_opf,"MOVSTOUW",size, st);
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mdtox_opf, "MOVDTOX", st);
|
||||
return;
|
||||
}
|
||||
impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mstouw_opf, "MOVSTOUW", st);
|
||||
}
|
||||
// Check for int->float copy on T4
|
||||
if (src_first_rc == rc_int && dst_first_rc == rc_float && UseVIS >= 3) {
|
||||
// Further check for aligned-adjacent pair, so we can use a double move
|
||||
if ((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second)
|
||||
return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mxtod_opf,"MOVXTOD",size, st);
|
||||
size = impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mwtos_opf,"MOVWTOS",size, st);
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mxtod_opf, "MOVXTOD", st);
|
||||
return;
|
||||
}
|
||||
impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mwtos_opf, "MOVWTOS", st);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
@ -1466,10 +1481,10 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
|
||||
// there. Misaligned sources only come from native-long-returns (handled
|
||||
// special below).
|
||||
#ifndef _LP64
|
||||
if( src_first_rc == rc_int && // source is already big-endian
|
||||
if (src_first_rc == rc_int && // source is already big-endian
|
||||
src_second_rc != rc_bad && // 64-bit move
|
||||
((dst_first&1)!=0 || dst_second != dst_first+1) ) { // misaligned dst
|
||||
assert( (src_first&1)==0 && src_second == src_first+1, "source must be aligned" );
|
||||
((dst_first & 1) != 0 || dst_second != dst_first + 1)) { // misaligned dst
|
||||
assert((src_first & 1) == 0 && src_second == src_first + 1, "source must be aligned");
|
||||
// Do the big-endian flop.
|
||||
OptoReg::Name tmp = dst_first ; dst_first = dst_second ; dst_second = tmp ;
|
||||
enum RC tmp_rc = dst_first_rc; dst_first_rc = dst_second_rc; dst_second_rc = tmp_rc;
|
||||
@ -1478,30 +1493,28 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
|
||||
|
||||
// --------------------------------------
|
||||
// Check for integer reg-reg copy
|
||||
if( src_first_rc == rc_int && dst_first_rc == rc_int ) {
|
||||
if (src_first_rc == rc_int && dst_first_rc == rc_int) {
|
||||
#ifndef _LP64
|
||||
if( src_first == R_O0_num && src_second == R_O1_num ) { // Check for the evil O0/O1 native long-return case
|
||||
if (src_first == R_O0_num && src_second == R_O1_num) { // Check for the evil O0/O1 native long-return case
|
||||
// Note: The _first and _second suffixes refer to the addresses of the the 2 halves of the 64-bit value
|
||||
// as stored in memory. On a big-endian machine like SPARC, this means that the _second
|
||||
// operand contains the least significant word of the 64-bit value and vice versa.
|
||||
OptoReg::Name tmp = OptoReg::Name(R_O7_num);
|
||||
assert( (dst_first&1)==0 && dst_second == dst_first+1, "return a native O0/O1 long to an aligned-adjacent 64-bit reg" );
|
||||
assert((dst_first & 1) == 0 && dst_second == dst_first + 1, "return a native O0/O1 long to an aligned-adjacent 64-bit reg" );
|
||||
// Shift O0 left in-place, zero-extend O1, then OR them into the dst
|
||||
if( cbuf ) {
|
||||
emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[tmp], Assembler::sllx_op3, Matcher::_regEncode[src_first], 0x1020 );
|
||||
emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[src_second], Assembler::srl_op3, Matcher::_regEncode[src_second], 0x0000 );
|
||||
emit3 ( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler:: or_op3, Matcher::_regEncode[tmp], 0, Matcher::_regEncode[src_second] );
|
||||
if ( cbuf ) {
|
||||
emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[tmp], Assembler::sllx_op3, Matcher::_regEncode[src_first], 0x1020);
|
||||
emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[src_second], Assembler::srl_op3, Matcher::_regEncode[src_second], 0x0000);
|
||||
emit3 (*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler:: or_op3, Matcher::_regEncode[tmp], 0, Matcher::_regEncode[src_second]);
|
||||
#ifndef PRODUCT
|
||||
} else if( !do_size ) {
|
||||
if( size != 0 ) st->print("\n\t");
|
||||
st->print("SLLX R_%s,32,R_%s\t! Move O0-first to O7-high\n\t", OptoReg::regname(src_first), OptoReg::regname(tmp));
|
||||
st->print("SRL R_%s, 0,R_%s\t! Zero-extend O1\n\t", OptoReg::regname(src_second), OptoReg::regname(src_second));
|
||||
st->print("OR R_%s,R_%s,R_%s\t! spill",OptoReg::regname(tmp), OptoReg::regname(src_second), OptoReg::regname(dst_first));
|
||||
} else {
|
||||
print_helper(st, "SLLX R_%s,32,R_%s\t! Move O0-first to O7-high\n\t", OptoReg::regname(src_first), OptoReg::regname(tmp));
|
||||
print_helper(st, "SRL R_%s, 0,R_%s\t! Zero-extend O1\n\t", OptoReg::regname(src_second), OptoReg::regname(src_second));
|
||||
print_helper(st, "OR R_%s,R_%s,R_%s\t! spill",OptoReg::regname(tmp), OptoReg::regname(src_second), OptoReg::regname(dst_first));
|
||||
#endif
|
||||
}
|
||||
return size+12;
|
||||
}
|
||||
else if( dst_first == R_I0_num && dst_second == R_I1_num ) {
|
||||
return;
|
||||
} else if (dst_first == R_I0_num && dst_second == R_I1_num) {
|
||||
// returning a long value in I0/I1
|
||||
// a SpillCopy must be able to target a return instruction's reg_class
|
||||
// Note: The _first and _second suffixes refer to the addresses of the the 2 halves of the 64-bit value
|
||||
@ -1511,27 +1524,25 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
|
||||
|
||||
if (src_first == dst_first) {
|
||||
tdest = OptoReg::Name(R_O7_num);
|
||||
size += 4;
|
||||
}
|
||||
|
||||
if( cbuf ) {
|
||||
assert( (src_first&1) == 0 && (src_first+1) == src_second, "return value was in an aligned-adjacent 64-bit reg");
|
||||
if (cbuf) {
|
||||
assert((src_first & 1) == 0 && (src_first + 1) == src_second, "return value was in an aligned-adjacent 64-bit reg");
|
||||
// Shift value in upper 32-bits of src to lower 32-bits of I0; move lower 32-bits to I1
|
||||
// ShrL_reg_imm6
|
||||
emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[tdest], Assembler::srlx_op3, Matcher::_regEncode[src_second], 32 | 0x1000 );
|
||||
emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[tdest], Assembler::srlx_op3, Matcher::_regEncode[src_second], 32 | 0x1000);
|
||||
// ShrR_reg_imm6 src, 0, dst
|
||||
emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srl_op3, Matcher::_regEncode[src_first], 0x0000 );
|
||||
emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srl_op3, Matcher::_regEncode[src_first], 0x0000);
|
||||
if (tdest != dst_first) {
|
||||
emit3 ( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler::or_op3, 0/*G0*/, 0/*op2*/, Matcher::_regEncode[tdest] );
|
||||
emit3 (*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler::or_op3, 0/*G0*/, 0/*op2*/, Matcher::_regEncode[tdest]);
|
||||
}
|
||||
}
|
||||
#ifndef PRODUCT
|
||||
else if( !do_size ) {
|
||||
if( size != 0 ) st->print("\n\t"); // %%%%% !!!!!
|
||||
st->print("SRLX R_%s,32,R_%s\t! Extract MSW\n\t",OptoReg::regname(src_second),OptoReg::regname(tdest));
|
||||
st->print("SRL R_%s, 0,R_%s\t! Extract LSW\n\t",OptoReg::regname(src_first),OptoReg::regname(dst_second));
|
||||
else {
|
||||
print_helper(st, "SRLX R_%s,32,R_%s\t! Extract MSW\n\t",OptoReg::regname(src_second),OptoReg::regname(tdest));
|
||||
print_helper(st, "SRL R_%s, 0,R_%s\t! Extract LSW\n\t",OptoReg::regname(src_first),OptoReg::regname(dst_second));
|
||||
if (tdest != dst_first) {
|
||||
st->print("MOV R_%s,R_%s\t! spill\n\t", OptoReg::regname(tdest), OptoReg::regname(dst_first));
|
||||
print_helper(st, "MOV R_%s,R_%s\t! spill\n\t", OptoReg::regname(tdest), OptoReg::regname(dst_first));
|
||||
}
|
||||
}
|
||||
#endif // PRODUCT
|
||||
@ -1539,65 +1550,77 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
|
||||
}
|
||||
#endif // !_LP64
|
||||
// Else normal reg-reg copy
|
||||
assert( src_second != dst_first, "smashed second before evacuating it" );
|
||||
size = impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::or_op3,0,"MOV ",size, st);
|
||||
assert( (src_first&1) == 0 && (dst_first&1) == 0, "never move second-halves of int registers" );
|
||||
assert(src_second != dst_first, "smashed second before evacuating it");
|
||||
impl_mov_helper(cbuf, src_first, dst_first, Assembler::or_op3, 0, "MOV ", st);
|
||||
assert((src_first & 1) == 0 && (dst_first & 1) == 0, "never move second-halves of int registers");
|
||||
// This moves an aligned adjacent pair.
|
||||
// See if we are done.
|
||||
if( src_first+1 == src_second && dst_first+1 == dst_second )
|
||||
return size;
|
||||
if (src_first + 1 == src_second && dst_first + 1 == dst_second) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for integer store
|
||||
if( src_first_rc == rc_int && dst_first_rc == rc_stack ) {
|
||||
if (src_first_rc == rc_int && dst_first_rc == rc_stack) {
|
||||
int offset = ra_->reg2offset(dst_first);
|
||||
// Further check for aligned-adjacent pair, so we can use a double store
|
||||
if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
|
||||
return impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stx_op3,"STX ",size, st);
|
||||
size = impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stw_op3,"STW ",size, st);
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stx_op3, "STX ", st);
|
||||
return;
|
||||
}
|
||||
impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stw_op3, "STW ", st);
|
||||
}
|
||||
|
||||
// Check for integer load
|
||||
if( dst_first_rc == rc_int && src_first_rc == rc_stack ) {
|
||||
if (dst_first_rc == rc_int && src_first_rc == rc_stack) {
|
||||
int offset = ra_->reg2offset(src_first);
|
||||
// Further check for aligned-adjacent pair, so we can use a double load
|
||||
if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
|
||||
return impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::ldx_op3 ,"LDX ",size, st);
|
||||
size = impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st);
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::ldx_op3, "LDX ", st);
|
||||
return;
|
||||
}
|
||||
impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lduw_op3, "LDUW", st);
|
||||
}
|
||||
|
||||
// Check for float reg-reg copy
|
||||
if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
|
||||
if (src_first_rc == rc_float && dst_first_rc == rc_float) {
|
||||
// Further check for aligned-adjacent pair, so we can use a double move
|
||||
if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
|
||||
return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::fpop1_op3,Assembler::fmovd_opf,"FMOVD",size, st);
|
||||
size = impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::fpop1_op3,Assembler::fmovs_opf,"FMOVS",size, st);
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
impl_mov_helper(cbuf, src_first, dst_first, Assembler::fpop1_op3, Assembler::fmovd_opf, "FMOVD", st);
|
||||
return;
|
||||
}
|
||||
impl_mov_helper(cbuf, src_first, dst_first, Assembler::fpop1_op3, Assembler::fmovs_opf, "FMOVS", st);
|
||||
}
|
||||
|
||||
// Check for float store
|
||||
if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
|
||||
if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
|
||||
int offset = ra_->reg2offset(dst_first);
|
||||
// Further check for aligned-adjacent pair, so we can use a double store
|
||||
if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
|
||||
return impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stdf_op3,"STDF",size, st);
|
||||
size = impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st);
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stdf_op3, "STDF", st);
|
||||
return;
|
||||
}
|
||||
impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stf_op3, "STF ", st);
|
||||
}
|
||||
|
||||
// Check for float load
|
||||
if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
|
||||
if (dst_first_rc == rc_float && src_first_rc == rc_stack) {
|
||||
int offset = ra_->reg2offset(src_first);
|
||||
// Further check for aligned-adjacent pair, so we can use a double load
|
||||
if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
|
||||
return impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::lddf_op3,"LDDF",size, st);
|
||||
size = impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::ldf_op3 ,"LDF ",size, st);
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lddf_op3, "LDDF", st);
|
||||
return;
|
||||
}
|
||||
impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::ldf_op3, "LDF ", st);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Check for hi bits still needing moving. Only happens for misaligned
|
||||
// arguments to native calls.
|
||||
if( src_second == dst_second )
|
||||
return size; // Self copy; no move
|
||||
assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
|
||||
if (src_second == dst_second) {
|
||||
return; // Self copy; no move
|
||||
}
|
||||
assert(src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad");
|
||||
|
||||
#ifndef _LP64
|
||||
// In the LP64 build, all registers can be moved as aligned/adjacent
|
||||
@ -1609,52 +1632,57 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
|
||||
// 32-bits of a 64-bit register, but are needed in low bits of another
|
||||
// register (else it's a hi-bits-to-hi-bits copy which should have
|
||||
// happened already as part of a 64-bit move)
|
||||
if( src_second_rc == rc_int && dst_second_rc == rc_int ) {
|
||||
assert( (src_second&1)==1, "its the evil O0/O1 native return case" );
|
||||
assert( (dst_second&1)==0, "should have moved with 1 64-bit move" );
|
||||
if (src_second_rc == rc_int && dst_second_rc == rc_int) {
|
||||
assert((src_second & 1) == 1, "its the evil O0/O1 native return case");
|
||||
assert((dst_second & 1) == 0, "should have moved with 1 64-bit move");
|
||||
// Shift src_second down to dst_second's low bits.
|
||||
if( cbuf ) {
|
||||
emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020 );
|
||||
if (cbuf) {
|
||||
emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020);
|
||||
#ifndef PRODUCT
|
||||
} else if( !do_size ) {
|
||||
if( size != 0 ) st->print("\n\t");
|
||||
st->print("SRLX R_%s,32,R_%s\t! spill: Move high bits down low",OptoReg::regname(src_second-1),OptoReg::regname(dst_second));
|
||||
} else {
|
||||
print_helper(st, "SRLX R_%s,32,R_%s\t! spill: Move high bits down low", OptoReg::regname(src_second - 1), OptoReg::regname(dst_second));
|
||||
#endif
|
||||
}
|
||||
return size+4;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for high word integer store. Must down-shift the hi bits
|
||||
// into a temp register, then fall into the case of storing int bits.
|
||||
if( src_second_rc == rc_int && dst_second_rc == rc_stack && (src_second&1)==1 ) {
|
||||
if (src_second_rc == rc_int && dst_second_rc == rc_stack && (src_second & 1) == 1) {
|
||||
// Shift src_second down to dst_second's low bits.
|
||||
if( cbuf ) {
|
||||
emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[R_O7_num], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020 );
|
||||
if (cbuf) {
|
||||
emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[R_O7_num], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020);
|
||||
#ifndef PRODUCT
|
||||
} else if( !do_size ) {
|
||||
if( size != 0 ) st->print("\n\t");
|
||||
st->print("SRLX R_%s,32,R_%s\t! spill: Move high bits down low",OptoReg::regname(src_second-1),OptoReg::regname(R_O7_num));
|
||||
} else {
|
||||
print_helper(st, "SRLX R_%s,32,R_%s\t! spill: Move high bits down low", OptoReg::regname(src_second-1), OptoReg::regname(R_O7_num));
|
||||
#endif
|
||||
}
|
||||
size+=4;
|
||||
src_second = OptoReg::Name(R_O7_num); // Not R_O7H_num!
|
||||
}
|
||||
|
||||
// Check for high word integer load
|
||||
if( dst_second_rc == rc_int && src_second_rc == rc_stack )
|
||||
return impl_helper(this,cbuf,ra_,do_size,true ,ra_->reg2offset(src_second),dst_second,Assembler::lduw_op3,"LDUW",size, st);
|
||||
if (dst_second_rc == rc_int && src_second_rc == rc_stack)
|
||||
return impl_helper(this, cbuf, ra_, true, ra_->reg2offset(src_second), dst_second, Assembler::lduw_op3, "LDUW", size, st);
|
||||
|
||||
// Check for high word integer store
|
||||
if( src_second_rc == rc_int && dst_second_rc == rc_stack )
|
||||
return impl_helper(this,cbuf,ra_,do_size,false,ra_->reg2offset(dst_second),src_second,Assembler::stw_op3 ,"STW ",size, st);
|
||||
if (src_second_rc == rc_int && dst_second_rc == rc_stack)
|
||||
return impl_helper(this, cbuf, ra_, false, ra_->reg2offset(dst_second), src_second, Assembler::stw_op3, "STW ", size, st);
|
||||
|
||||
// Check for high word float store
|
||||
if( src_second_rc == rc_float && dst_second_rc == rc_stack )
|
||||
return impl_helper(this,cbuf,ra_,do_size,false,ra_->reg2offset(dst_second),src_second,Assembler::stf_op3 ,"STF ",size, st);
|
||||
if (src_second_rc == rc_float && dst_second_rc == rc_stack)
|
||||
return impl_helper(this, cbuf, ra_, false, ra_->reg2offset(dst_second), src_second, Assembler::stf_op3, "STF ", size, st);
|
||||
|
||||
#endif // !_LP64
|
||||
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
uint MachSpillCopyNode::implementation(CodeBuffer *cbuf,
|
||||
PhaseRegAlloc *ra_,
|
||||
bool do_size,
|
||||
outputStream* st) const {
|
||||
assert(!do_size, "not supported");
|
||||
mach_spill_copy_implementation_helper(this, cbuf, ra_, st);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1669,19 +1697,19 @@ void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
||||
}
|
||||
|
||||
uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
|
||||
return implementation( NULL, ra_, true, NULL );
|
||||
return MachNode::size(ra_);
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
#ifndef PRODUCT
|
||||
void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
|
||||
void MachNopNode::format(PhaseRegAlloc *, outputStream *st) const {
|
||||
st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
|
||||
}
|
||||
#endif
|
||||
|
||||
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
|
||||
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
|
||||
MacroAssembler _masm(&cbuf);
|
||||
for(int i = 0; i < _count; i += 1) {
|
||||
for (int i = 0; i < _count; i += 1) {
|
||||
__ nop();
|
||||
}
|
||||
}
|
||||
@ -1952,9 +1980,6 @@ const bool Matcher::isSimpleConstant64(jlong value) {
|
||||
// No scaling for the parameter the ClearArray node.
|
||||
const bool Matcher::init_array_count_is_in_bytes = true;
|
||||
|
||||
// Threshold size for cleararray.
|
||||
const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
||||
|
||||
// No additional cost for CMOVL.
|
||||
const int Matcher::long_cmove_cost() { return 0; }
|
||||
|
||||
@ -5197,7 +5222,6 @@ instruct stkI_to_regF(regF dst, stackSlotI src) %{
|
||||
// No match rule to avoid chain rule match.
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
format %{ "LDF $src,$dst\t! stkI to regF" %}
|
||||
opcode(Assembler::ldf_op3);
|
||||
ins_encode(simple_form3_mem_reg(src, dst));
|
||||
@ -5208,7 +5232,6 @@ instruct stkL_to_regD(regD dst, stackSlotL src) %{
|
||||
// No match rule to avoid chain rule match.
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
format %{ "LDDF $src,$dst\t! stkL to regD" %}
|
||||
opcode(Assembler::lddf_op3);
|
||||
ins_encode(simple_form3_mem_reg(src, dst));
|
||||
@ -5219,7 +5242,6 @@ instruct regF_to_stkI(stackSlotI dst, regF src) %{
|
||||
// No match rule to avoid chain rule match.
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
format %{ "STF $src,$dst\t! regF to stkI" %}
|
||||
opcode(Assembler::stf_op3);
|
||||
ins_encode(simple_form3_mem_reg(dst, src));
|
||||
@ -5230,7 +5252,6 @@ instruct regD_to_stkL(stackSlotL dst, regD src) %{
|
||||
// No match rule to avoid chain rule match.
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
format %{ "STDF $src,$dst\t! regD to stkL" %}
|
||||
opcode(Assembler::stdf_op3);
|
||||
ins_encode(simple_form3_mem_reg(dst, src));
|
||||
@ -5240,7 +5261,6 @@ instruct regD_to_stkL(stackSlotL dst, regD src) %{
|
||||
instruct regI_to_stkLHi(stackSlotL dst, iRegI src) %{
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST*2);
|
||||
size(8);
|
||||
format %{ "STW $src,$dst.hi\t! long\n\t"
|
||||
"STW R_G0,$dst.lo" %}
|
||||
opcode(Assembler::stw_op3);
|
||||
@ -5252,7 +5272,6 @@ instruct regL_to_stkD(stackSlotD dst, iRegL src) %{
|
||||
// No match rule to avoid chain rule match.
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
format %{ "STX $src,$dst\t! regL to stkD" %}
|
||||
opcode(Assembler::stx_op3);
|
||||
ins_encode(simple_form3_mem_reg( dst, src ) );
|
||||
@ -5266,7 +5285,6 @@ instruct stkI_to_regI( iRegI dst, stackSlotI src ) %{
|
||||
match(Set dst src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "LDUW $src,$dst\t!stk" %}
|
||||
opcode(Assembler::lduw_op3);
|
||||
ins_encode(simple_form3_mem_reg( src, dst ) );
|
||||
@ -5278,7 +5296,6 @@ instruct regI_to_stkI( stackSlotI dst, iRegI src ) %{
|
||||
match(Set dst src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STW $src,$dst\t!stk" %}
|
||||
opcode(Assembler::stw_op3);
|
||||
ins_encode(simple_form3_mem_reg( dst, src ) );
|
||||
@ -5290,7 +5307,6 @@ instruct stkL_to_regL( iRegL dst, stackSlotL src ) %{
|
||||
match(Set dst src);
|
||||
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
format %{ "LDX $src,$dst\t! long" %}
|
||||
opcode(Assembler::ldx_op3);
|
||||
ins_encode(simple_form3_mem_reg( src, dst ) );
|
||||
@ -5302,7 +5318,6 @@ instruct regL_to_stkL(stackSlotL dst, iRegL src) %{
|
||||
match(Set dst src);
|
||||
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
format %{ "STX $src,$dst\t! long" %}
|
||||
opcode(Assembler::stx_op3);
|
||||
ins_encode(simple_form3_mem_reg( dst, src ) );
|
||||
@ -5314,7 +5329,6 @@ instruct regL_to_stkL(stackSlotL dst, iRegL src) %{
|
||||
instruct stkP_to_regP( iRegP dst, stackSlotP src ) %{
|
||||
match(Set dst src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
format %{ "LDX $src,$dst\t!ptr" %}
|
||||
opcode(Assembler::ldx_op3);
|
||||
ins_encode(simple_form3_mem_reg( src, dst ) );
|
||||
@ -5325,7 +5339,6 @@ instruct stkP_to_regP( iRegP dst, stackSlotP src ) %{
|
||||
instruct regP_to_stkP(stackSlotP dst, iRegP src) %{
|
||||
match(Set dst src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
format %{ "STX $src,$dst\t!ptr" %}
|
||||
opcode(Assembler::stx_op3);
|
||||
ins_encode(simple_form3_mem_reg( dst, src ) );
|
||||
@ -5771,7 +5784,6 @@ instruct loadL_unaligned(iRegL dst, memory mem, o7RegI tmp) %{
|
||||
match(Set dst (LoadL_unaligned mem));
|
||||
effect(KILL tmp);
|
||||
ins_cost(MEMORY_REF_COST*2+DEFAULT_COST);
|
||||
size(16);
|
||||
format %{ "LDUW $mem+4,R_O7\t! misaligned long\n"
|
||||
"\tLDUW $mem ,$dst\n"
|
||||
"\tSLLX #32, $dst, $dst\n"
|
||||
@ -5786,7 +5798,6 @@ instruct loadRange(iRegI dst, memory mem) %{
|
||||
match(Set dst (LoadRange mem));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "LDUW $mem,$dst\t! range" %}
|
||||
opcode(Assembler::lduw_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, dst ) );
|
||||
@ -5797,7 +5808,6 @@ instruct loadRange(iRegI dst, memory mem) %{
|
||||
instruct loadI_freg(regF dst, memory mem) %{
|
||||
match(Set dst (LoadI mem));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
|
||||
format %{ "LDF $mem,$dst\t! for fitos/fitod" %}
|
||||
opcode(Assembler::ldf_op3);
|
||||
@ -5876,7 +5886,6 @@ instruct loadD(regD dst, memory mem) %{
|
||||
match(Set dst (LoadD mem));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "LDDF $mem,$dst" %}
|
||||
opcode(Assembler::lddf_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, dst ) );
|
||||
@ -5887,7 +5896,6 @@ instruct loadD(regD dst, memory mem) %{
|
||||
instruct loadD_unaligned(regD_low dst, memory mem ) %{
|
||||
match(Set dst (LoadD_unaligned mem));
|
||||
ins_cost(MEMORY_REF_COST*2+DEFAULT_COST);
|
||||
size(8);
|
||||
format %{ "LDF $mem ,$dst.hi\t! misaligned double\n"
|
||||
"\tLDF $mem+4,$dst.lo\t!" %}
|
||||
opcode(Assembler::ldf_op3);
|
||||
@ -5900,7 +5908,6 @@ instruct loadF(regF dst, memory mem) %{
|
||||
match(Set dst (LoadF mem));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "LDF $mem,$dst" %}
|
||||
opcode(Assembler::ldf_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, dst ) );
|
||||
@ -6119,7 +6126,6 @@ instruct prefetchAlloc( memory mem ) %{
|
||||
predicate(AllocatePrefetchInstr == 0);
|
||||
match( PrefetchAllocation mem );
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
|
||||
format %{ "PREFETCH $mem,2\t! Prefetch allocation" %}
|
||||
opcode(Assembler::prefetch_op3);
|
||||
@ -6175,7 +6181,6 @@ instruct storeB(memory mem, iRegI src) %{
|
||||
match(Set mem (StoreB mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STB $src,$mem\t! byte" %}
|
||||
opcode(Assembler::stb_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, src ) );
|
||||
@ -6186,7 +6191,6 @@ instruct storeB0(memory mem, immI0 src) %{
|
||||
match(Set mem (StoreB mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STB $src,$mem\t! byte" %}
|
||||
opcode(Assembler::stb_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
|
||||
@ -6197,7 +6201,6 @@ instruct storeCM0(memory mem, immI0 src) %{
|
||||
match(Set mem (StoreCM mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STB $src,$mem\t! CMS card-mark byte 0" %}
|
||||
opcode(Assembler::stb_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
|
||||
@ -6209,7 +6212,6 @@ instruct storeC(memory mem, iRegI src) %{
|
||||
match(Set mem (StoreC mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STH $src,$mem\t! short" %}
|
||||
opcode(Assembler::sth_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, src ) );
|
||||
@ -6220,7 +6222,6 @@ instruct storeC0(memory mem, immI0 src) %{
|
||||
match(Set mem (StoreC mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STH $src,$mem\t! short" %}
|
||||
opcode(Assembler::sth_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
|
||||
@ -6232,7 +6233,6 @@ instruct storeI(memory mem, iRegI src) %{
|
||||
match(Set mem (StoreI mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STW $src,$mem" %}
|
||||
opcode(Assembler::stw_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, src ) );
|
||||
@ -6243,7 +6243,6 @@ instruct storeI(memory mem, iRegI src) %{
|
||||
instruct storeL(memory mem, iRegL src) %{
|
||||
match(Set mem (StoreL mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
format %{ "STX $src,$mem\t! long" %}
|
||||
opcode(Assembler::stx_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, src ) );
|
||||
@ -6254,7 +6253,6 @@ instruct storeI0(memory mem, immI0 src) %{
|
||||
match(Set mem (StoreI mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STW $src,$mem" %}
|
||||
opcode(Assembler::stw_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
|
||||
@ -6265,7 +6263,6 @@ instruct storeL0(memory mem, immL0 src) %{
|
||||
match(Set mem (StoreL mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STX $src,$mem" %}
|
||||
opcode(Assembler::stx_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
|
||||
@ -6277,7 +6274,6 @@ instruct storeI_Freg(memory mem, regF src) %{
|
||||
match(Set mem (StoreI mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STF $src,$mem\t! after fstoi/fdtoi" %}
|
||||
opcode(Assembler::stf_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, src ) );
|
||||
@ -6288,7 +6284,6 @@ instruct storeI_Freg(memory mem, regF src) %{
|
||||
instruct storeP(memory dst, sp_ptr_RegP src) %{
|
||||
match(Set dst (StoreP dst src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
|
||||
#ifndef _LP64
|
||||
format %{ "STW $src,$dst\t! ptr" %}
|
||||
@ -6304,7 +6299,6 @@ instruct storeP(memory dst, sp_ptr_RegP src) %{
|
||||
instruct storeP0(memory dst, immP0 src) %{
|
||||
match(Set dst (StoreP dst src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
size(4);
|
||||
|
||||
#ifndef _LP64
|
||||
format %{ "STW $src,$dst\t! ptr" %}
|
||||
@ -6379,7 +6373,6 @@ instruct storeD( memory mem, regD src) %{
|
||||
match(Set mem (StoreD mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STDF $src,$mem" %}
|
||||
opcode(Assembler::stdf_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, src ) );
|
||||
@ -6390,7 +6383,6 @@ instruct storeD0( memory mem, immD0 src) %{
|
||||
match(Set mem (StoreD mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STX $src,$mem" %}
|
||||
opcode(Assembler::stx_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
|
||||
@ -6402,7 +6394,6 @@ instruct storeF( memory mem, regF src) %{
|
||||
match(Set mem (StoreF mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STF $src,$mem" %}
|
||||
opcode(Assembler::stf_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, src ) );
|
||||
@ -6413,7 +6404,6 @@ instruct storeF0( memory mem, immF0 src) %{
|
||||
match(Set mem (StoreF mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STW $src,$mem\t! storeF0" %}
|
||||
opcode(Assembler::stw_op3);
|
||||
ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
|
||||
@ -7068,7 +7058,6 @@ instruct loadPLocked(iRegP dst, memory mem) %{
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
#ifndef _LP64
|
||||
size(4);
|
||||
format %{ "LDUW $mem,$dst\t! ptr" %}
|
||||
opcode(Assembler::lduw_op3, 0, REGP_OP);
|
||||
#else
|
||||
@ -8138,7 +8127,6 @@ instruct MoveF2I_stack_reg(iRegI dst, stackSlotF src) %{
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "LDUW $src,$dst\t! MoveF2I" %}
|
||||
opcode(Assembler::lduw_op3);
|
||||
ins_encode(simple_form3_mem_reg( src, dst ) );
|
||||
@ -8150,7 +8138,6 @@ instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "LDF $src,$dst\t! MoveI2F" %}
|
||||
opcode(Assembler::ldf_op3);
|
||||
ins_encode(simple_form3_mem_reg(src, dst));
|
||||
@ -8162,7 +8149,6 @@ instruct MoveD2L_stack_reg(iRegL dst, stackSlotD src) %{
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "LDX $src,$dst\t! MoveD2L" %}
|
||||
opcode(Assembler::ldx_op3);
|
||||
ins_encode(simple_form3_mem_reg( src, dst ) );
|
||||
@ -8174,7 +8160,6 @@ instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "LDDF $src,$dst\t! MoveL2D" %}
|
||||
opcode(Assembler::lddf_op3);
|
||||
ins_encode(simple_form3_mem_reg(src, dst));
|
||||
@ -8186,7 +8171,6 @@ instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STF $src,$dst\t! MoveF2I" %}
|
||||
opcode(Assembler::stf_op3);
|
||||
ins_encode(simple_form3_mem_reg(dst, src));
|
||||
@ -8198,7 +8182,6 @@ instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STW $src,$dst\t! MoveI2F" %}
|
||||
opcode(Assembler::stw_op3);
|
||||
ins_encode(simple_form3_mem_reg( dst, src ) );
|
||||
@ -8210,7 +8193,6 @@ instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STDF $src,$dst\t! MoveD2L" %}
|
||||
opcode(Assembler::stdf_op3);
|
||||
ins_encode(simple_form3_mem_reg(dst, src));
|
||||
@ -8222,7 +8204,6 @@ instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
|
||||
effect(DEF dst, USE src);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
size(4);
|
||||
format %{ "STX $src,$dst\t! MoveL2D" %}
|
||||
opcode(Assembler::stx_op3);
|
||||
ins_encode(simple_form3_mem_reg( dst, src ) );
|
||||
@ -8427,7 +8408,6 @@ instruct convI2D_reg(regD_low dst, iRegI src) %{
|
||||
instruct convI2D_mem(regD_low dst, memory mem) %{
|
||||
match(Set dst (ConvI2D (LoadI mem)));
|
||||
ins_cost(DEFAULT_COST + MEMORY_REF_COST);
|
||||
size(8);
|
||||
format %{ "LDF $mem,$dst\n\t"
|
||||
"FITOD $dst,$dst" %}
|
||||
opcode(Assembler::ldf_op3, Assembler::fitod_opf);
|
||||
@ -8468,7 +8448,6 @@ instruct convI2F_reg(regF dst, iRegI src) %{
|
||||
instruct convI2F_mem( regF dst, memory mem ) %{
|
||||
match(Set dst (ConvI2F (LoadI mem)));
|
||||
ins_cost(DEFAULT_COST + MEMORY_REF_COST);
|
||||
size(8);
|
||||
format %{ "LDF $mem,$dst\n\t"
|
||||
"FITOS $dst,$dst" %}
|
||||
opcode(Assembler::ldf_op3, Assembler::fitos_opf);
|
||||
|
||||
@ -463,3 +463,37 @@ unsigned int VM_Version::calc_parallel_worker_threads() {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
int VM_Version::parse_features(const char* implementation) {
|
||||
int features = unknown_m;
|
||||
// Convert to UPPER case before compare.
|
||||
char* impl = os::strdup_check_oom(implementation);
|
||||
|
||||
for (int i = 0; impl[i] != 0; i++)
|
||||
impl[i] = (char)toupper((uint)impl[i]);
|
||||
|
||||
if (strstr(impl, "SPARC64") != NULL) {
|
||||
features |= sparc64_family_m;
|
||||
} else if (strstr(impl, "SPARC-M") != NULL) {
|
||||
// M-series SPARC is based on T-series.
|
||||
features |= (M_family_m | T_family_m);
|
||||
} else if (strstr(impl, "SPARC-T") != NULL) {
|
||||
features |= T_family_m;
|
||||
if (strstr(impl, "SPARC-T1") != NULL) {
|
||||
features |= T1_model_m;
|
||||
}
|
||||
} else {
|
||||
if (strstr(impl, "SPARC") == NULL) {
|
||||
#ifndef PRODUCT
|
||||
// kstat on Solaris 8 virtual machines (branded zones)
|
||||
// returns "(unsupported)" implementation. Solaris 8 is not
|
||||
// supported anymore, but include this check to be on the
|
||||
// safe side.
|
||||
warning("Can't parse CPU implementation = '%s', assume generic SPARC", impl);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
os::free((void*)impl);
|
||||
return features;
|
||||
}
|
||||
|
||||
@ -121,7 +121,7 @@ protected:
|
||||
static bool is_T1_model(int features) { return is_T_family(features) && ((features & T1_model_m) != 0); }
|
||||
|
||||
static int maximum_niagara1_processor_count() { return 32; }
|
||||
|
||||
static int parse_features(const char* implementation);
|
||||
public:
|
||||
// Initialization
|
||||
static void initialize();
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -777,6 +777,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
|
||||
case 0x6E: // movd
|
||||
case 0x7E: // movd
|
||||
case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
|
||||
case 0xFE: // paddd
|
||||
debug_only(has_disp32 = true);
|
||||
break;
|
||||
|
||||
@ -926,6 +927,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
|
||||
ip++; // skip P2, move to opcode
|
||||
// To find the end of instruction (which == end_pc_operand).
|
||||
switch (0xFF & *ip) {
|
||||
case 0x22: // pinsrd r, r/a, #8
|
||||
case 0x61: // pcmpestri r, r/a, #8
|
||||
case 0x70: // pshufd r, r/a, #8
|
||||
case 0x73: // psrldq r, #8
|
||||
@ -3953,6 +3955,83 @@ void Assembler::setb(Condition cc, Register dst) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
|
||||
assert(VM_Version::supports_ssse3(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8((unsigned char)0x0F);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
|
||||
assert(VM_Version::supports_sse4_1(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8((unsigned char)0x0E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
|
||||
assert(VM_Version::supports_sha(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8((unsigned char)0xCC);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
emit_int8((unsigned char)imm8);
|
||||
}
|
||||
|
||||
void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sha(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xC8);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sha(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xC9);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sha(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xCA);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
// xmm0 is implicit additional source to this instruction.
|
||||
void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sha(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xCB);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sha(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xCC);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sha(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xCD);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
|
||||
void Assembler::shll(Register dst, int imm8) {
|
||||
assert(isShiftCount(imm8), "illegal shift count");
|
||||
int encode = prefix_and_encode(dst->encoding());
|
||||
@ -4931,6 +5010,15 @@ void Assembler::paddd(XMMRegister dst, XMMRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::paddd(XMMRegister dst, Address src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xFE);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::paddq(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
@ -5611,8 +5699,9 @@ void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_
|
||||
}
|
||||
|
||||
|
||||
void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
@ -5621,11 +5710,12 @@ void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
// 0x00 - insert into lower 128 bits
|
||||
// 0x01 - insert into upper 128 bits
|
||||
emit_int8(0x01);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
|
||||
void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
@ -5633,26 +5723,29 @@ void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
// 0x00 - insert into lower 256 bits
|
||||
// 0x01 - insert into upper 256 bits
|
||||
emit_int8(value & 0x01);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) {
|
||||
void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x1A);
|
||||
emit_operand(dst, src);
|
||||
// 0x00 - insert into lower 256 bits
|
||||
// 0x01 - insert into upper 128 bits
|
||||
emit_int8(value & 0x01);
|
||||
// 0x01 - insert into upper 256 bits
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
|
||||
void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
@ -5662,57 +5755,64 @@ void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||
// 0x01 - insert into q1 128 bits (128..255)
|
||||
// 0x02 - insert into q2 128 bits (256..383)
|
||||
// 0x03 - insert into q3 128 bits (384..511)
|
||||
emit_int8(value & 0x3);
|
||||
emit_int8(imm8 & 0x03);
|
||||
}
|
||||
|
||||
void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) {
|
||||
void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x18);
|
||||
emit_operand(dst, src);
|
||||
// 0x00 - insert into q0 128 bits (0..127)
|
||||
// 0x01 - insert into q1 128 bits (128..255)
|
||||
// 0x02 - insert into q2 128 bits (256..383)
|
||||
// 0x03 - insert into q3 128 bits (384..511)
|
||||
emit_int8(value & 0x3);
|
||||
emit_int8(imm8 & 0x03);
|
||||
}
|
||||
|
||||
void Assembler::vinsertf128h(XMMRegister dst, Address src) {
|
||||
void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x18);
|
||||
emit_operand(dst, src);
|
||||
// 0x00 - insert into lower 128 bits
|
||||
// 0x01 - insert into upper 128 bits
|
||||
emit_int8(0x01);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x19);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
// 0x00 - insert into lower 128 bits
|
||||
// 0x01 - insert into upper 128 bits
|
||||
emit_int8(0x01);
|
||||
// 0x00 - extract from lower 128 bits
|
||||
// 0x01 - extract from upper 128 bits
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vextractf128h(Address dst, XMMRegister src) {
|
||||
void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(src != xnoreg, "sanity");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
@ -5720,12 +5820,14 @@ void Assembler::vextractf128h(Address dst, XMMRegister src) {
|
||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x19);
|
||||
emit_operand(src, dst);
|
||||
// 0x00 - extract from lower 128 bits
|
||||
// 0x01 - extract from upper 128 bits
|
||||
emit_int8(0x01);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
@ -5734,11 +5836,12 @@ void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
// 0x00 - insert into lower 128 bits
|
||||
// 0x01 - insert into upper 128 bits
|
||||
emit_int8(0x01);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
|
||||
void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
@ -5746,39 +5849,44 @@ void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
// 0x00 - insert into lower 256 bits
|
||||
// 0x01 - insert into upper 256 bits
|
||||
emit_int8(value & 0x01);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vinserti128h(XMMRegister dst, Address src) {
|
||||
void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x38);
|
||||
emit_operand(dst, src);
|
||||
// 0x00 - insert into lower 128 bits
|
||||
// 0x01 - insert into upper 128 bits
|
||||
emit_int8(0x01);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x39);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
// 0x00 - insert into lower 128 bits
|
||||
// 0x01 - insert into upper 128 bits
|
||||
emit_int8(0x01);
|
||||
// 0x00 - extract from lower 128 bits
|
||||
// 0x01 - extract from upper 128 bits
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vextracti128h(Address dst, XMMRegister src) {
|
||||
void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
assert(src != xnoreg, "sanity");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
@ -5786,47 +5894,53 @@ void Assembler::vextracti128h(Address dst, XMMRegister src) {
|
||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x39);
|
||||
emit_operand(src, dst);
|
||||
// 0x00 - extract from lower 128 bits
|
||||
// 0x01 - extract from upper 128 bits
|
||||
emit_int8(0x01);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) {
|
||||
void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x3B);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
// 0x00 - extract from lower 256 bits
|
||||
// 0x01 - extract from upper 256 bits
|
||||
emit_int8(value & 0x01);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
|
||||
void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x39);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
// 0x00 - extract from bits 127:0
|
||||
// 0x01 - extract from bits 255:128
|
||||
// 0x02 - extract from bits 383:256
|
||||
// 0x03 - extract from bits 511:384
|
||||
emit_int8(value & 0x3);
|
||||
emit_int8(imm8 & 0x03);
|
||||
}
|
||||
|
||||
void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) {
|
||||
void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x1B);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
// 0x00 - extract from lower 256 bits
|
||||
// 0x01 - extract from upper 256 bits
|
||||
emit_int8(value & 0x1);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) {
|
||||
void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(src != xnoreg, "sanity");
|
||||
assert(imm8 <= 0x01, "imm8: %u", imm8);
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit);
|
||||
@ -5835,11 +5949,12 @@ void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) {
|
||||
emit_operand(src, dst);
|
||||
// 0x00 - extract from lower 256 bits
|
||||
// 0x01 - extract from upper 256 bits
|
||||
emit_int8(value & 0x01);
|
||||
emit_int8(imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
|
||||
void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
@ -5849,12 +5964,13 @@ void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
|
||||
// 0x01 - extract from bits 255:128
|
||||
// 0x02 - extract from bits 383:256
|
||||
// 0x03 - extract from bits 511:384
|
||||
emit_int8(value & 0x3);
|
||||
emit_int8(imm8 & 0x03);
|
||||
}
|
||||
|
||||
void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
|
||||
void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(src != xnoreg, "sanity");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
||||
@ -5865,19 +5981,21 @@ void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
|
||||
// 0x01 - extract from bits 255:128
|
||||
// 0x02 - extract from bits 383:256
|
||||
// 0x03 - extract from bits 511:384
|
||||
emit_int8(value & 0x3);
|
||||
emit_int8(imm8 & 0x03);
|
||||
}
|
||||
|
||||
void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
|
||||
void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x19);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
// 0x00 - extract from bits 127:0
|
||||
// 0x01 - extract from bits 255:128
|
||||
// 0x02 - extract from bits 383:256
|
||||
// 0x03 - extract from bits 511:384
|
||||
emit_int8(value & 0x3);
|
||||
emit_int8(imm8 & 0x03);
|
||||
}
|
||||
|
||||
// duplicate 4-bytes integer data from src into 8 locations in dest
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1672,6 +1672,18 @@ private:
|
||||
|
||||
void setb(Condition cc, Register dst);
|
||||
|
||||
void palignr(XMMRegister dst, XMMRegister src, int imm8);
|
||||
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
|
||||
|
||||
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
|
||||
void sha1nexte(XMMRegister dst, XMMRegister src);
|
||||
void sha1msg1(XMMRegister dst, XMMRegister src);
|
||||
void sha1msg2(XMMRegister dst, XMMRegister src);
|
||||
// xmm0 is implicit additional source to the following instruction.
|
||||
void sha256rnds2(XMMRegister dst, XMMRegister src);
|
||||
void sha256msg1(XMMRegister dst, XMMRegister src);
|
||||
void sha256msg2(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void shldl(Register dst, Register src);
|
||||
void shldl(Register dst, Register src, int8_t imm8);
|
||||
|
||||
@ -1868,6 +1880,7 @@ private:
|
||||
void paddb(XMMRegister dst, XMMRegister src);
|
||||
void paddw(XMMRegister dst, XMMRegister src);
|
||||
void paddd(XMMRegister dst, XMMRegister src);
|
||||
void paddd(XMMRegister dst, Address src);
|
||||
void paddq(XMMRegister dst, XMMRegister src);
|
||||
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
@ -1958,33 +1971,31 @@ private:
|
||||
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
// Copy low 128bit into high 128bit of YMM registers.
|
||||
void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vextractf128h(XMMRegister dst, XMMRegister src);
|
||||
void vextracti128h(XMMRegister dst, XMMRegister src);
|
||||
// 128bit copy from/to 256bit (YMM) vector registers
|
||||
void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||
void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||
void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||
void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
|
||||
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
|
||||
void vextractf128(Address dst, XMMRegister src, uint8_t imm8);
|
||||
void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
|
||||
|
||||
// Load/store high 128bit of YMM registers which does not destroy other half.
|
||||
void vinsertf128h(XMMRegister dst, Address src);
|
||||
void vinserti128h(XMMRegister dst, Address src);
|
||||
void vextractf128h(Address dst, XMMRegister src);
|
||||
void vextracti128h(Address dst, XMMRegister src);
|
||||
// 256bit copy from/to 512bit (ZMM) vector registers
|
||||
void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||
void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||
void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||
void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||
void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
|
||||
void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
|
||||
|
||||
// Copy low 256bit into high 256bit of ZMM registers.
|
||||
void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
|
||||
void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
|
||||
void vextracti64x4h(XMMRegister dst, XMMRegister src, int value);
|
||||
void vextractf64x4h(XMMRegister dst, XMMRegister src, int value);
|
||||
void vextractf64x4h(Address dst, XMMRegister src, int value);
|
||||
void vinsertf64x4h(XMMRegister dst, Address src, int value);
|
||||
|
||||
// Copy targeted 128bit segments of the ZMM registers
|
||||
void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
|
||||
void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
|
||||
void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
|
||||
void vextractf32x4h(Address dst, XMMRegister src, int value);
|
||||
void vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
|
||||
void vinsertf32x4h(XMMRegister dst, Address src, int value);
|
||||
// 128bit copy from/to 256bit (YMM) or 512bit (ZMM) vector registers
|
||||
void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||
void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||
void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
|
||||
void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);
|
||||
void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||
void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
|
||||
|
||||
// duplicate 4-bytes integer data from src into 8 locations in dest
|
||||
void vpbroadcastd(XMMRegister dst, XMMRegister src);
|
||||
|
||||
@ -97,6 +97,8 @@ define_pd_global(bool, CompactStrings, true);
|
||||
|
||||
define_pd_global(bool, PreserveFramePointer, false);
|
||||
|
||||
define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||
|
||||
#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
|
||||
\
|
||||
develop(bool, IEEEPrecision, true, \
|
||||
|
||||
@ -3445,7 +3445,7 @@ void MacroAssembler::movptr(Address dst, Register src) {
|
||||
|
||||
void MacroAssembler::movdqu(Address dst, XMMRegister src) {
|
||||
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
|
||||
Assembler::vextractf32x4h(dst, src, 0);
|
||||
Assembler::vextractf32x4(dst, src, 0);
|
||||
} else {
|
||||
Assembler::movdqu(dst, src);
|
||||
}
|
||||
@ -3453,7 +3453,7 @@ void MacroAssembler::movdqu(Address dst, XMMRegister src) {
|
||||
|
||||
void MacroAssembler::movdqu(XMMRegister dst, Address src) {
|
||||
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
|
||||
Assembler::vinsertf32x4h(dst, src, 0);
|
||||
Assembler::vinsertf32x4(dst, dst, src, 0);
|
||||
} else {
|
||||
Assembler::movdqu(dst, src);
|
||||
}
|
||||
@ -3478,7 +3478,7 @@ void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
|
||||
|
||||
void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
|
||||
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
|
||||
Assembler::vextractf64x4h(dst, src, 0);
|
||||
vextractf64x4_low(dst, src);
|
||||
} else {
|
||||
Assembler::vmovdqu(dst, src);
|
||||
}
|
||||
@ -3486,7 +3486,7 @@ void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
|
||||
|
||||
void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
|
||||
if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
|
||||
Assembler::vinsertf64x4h(dst, src, 0);
|
||||
vinsertf64x4_low(dst, src);
|
||||
} else {
|
||||
Assembler::vmovdqu(dst, src);
|
||||
}
|
||||
@ -5649,14 +5649,14 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
||||
// Save upper half of ZMM registers
|
||||
subptr(rsp, 32*num_xmm_regs);
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
|
||||
vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
|
||||
}
|
||||
}
|
||||
assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
|
||||
// Save upper half of YMM registers
|
||||
subptr(rsp, 16*num_xmm_regs);
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
|
||||
vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -5665,7 +5665,7 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
||||
#ifdef _LP64
|
||||
if (VM_Version::supports_evex()) {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0);
|
||||
vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
|
||||
}
|
||||
} else {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
@ -5753,7 +5753,7 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
||||
#ifdef _LP64
|
||||
if (VM_Version::supports_evex()) {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0);
|
||||
vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
|
||||
}
|
||||
} else {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
@ -5771,12 +5771,12 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
|
||||
if (MaxVectorSize > 16) {
|
||||
// Restore upper half of YMM registers.
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
|
||||
vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
|
||||
}
|
||||
addptr(rsp, 16*num_xmm_regs);
|
||||
if(UseAVX > 2) {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
|
||||
vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
|
||||
}
|
||||
addptr(rsp, 32*num_xmm_regs);
|
||||
}
|
||||
@ -7198,21 +7198,50 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_
|
||||
|
||||
}
|
||||
|
||||
void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) {
|
||||
void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, bool is_large) {
|
||||
// cnt - number of qwords (8-byte words).
|
||||
// base - start address, qword aligned.
|
||||
// is_large - if optimizers know cnt is larger than InitArrayShortSize
|
||||
assert(base==rdi, "base register must be edi for rep stos");
|
||||
assert(tmp==rax, "tmp register must be eax for rep stos");
|
||||
assert(cnt==rcx, "cnt register must be ecx for rep stos");
|
||||
assert(InitArrayShortSize % BytesPerLong == 0,
|
||||
"InitArrayShortSize should be the multiple of BytesPerLong");
|
||||
|
||||
Label DONE;
|
||||
|
||||
xorptr(tmp, tmp);
|
||||
|
||||
if (!is_large) {
|
||||
Label LOOP, LONG;
|
||||
cmpptr(cnt, InitArrayShortSize/BytesPerLong);
|
||||
jccb(Assembler::greater, LONG);
|
||||
|
||||
NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
|
||||
|
||||
decrement(cnt);
|
||||
jccb(Assembler::negative, DONE); // Zero length
|
||||
|
||||
// Use individual pointer-sized stores for small counts:
|
||||
BIND(LOOP);
|
||||
movptr(Address(base, cnt, Address::times_ptr), tmp);
|
||||
decrement(cnt);
|
||||
jccb(Assembler::greaterEqual, LOOP);
|
||||
jmpb(DONE);
|
||||
|
||||
BIND(LONG);
|
||||
}
|
||||
|
||||
// Use longer rep-prefixed ops for non-small counts:
|
||||
if (UseFastStosb) {
|
||||
shlptr(cnt,3); // convert to number of bytes
|
||||
shlptr(cnt, 3); // convert to number of bytes
|
||||
rep_stosb();
|
||||
} else {
|
||||
NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
|
||||
NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
|
||||
rep_stos();
|
||||
}
|
||||
|
||||
BIND(DONE);
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -48,7 +48,6 @@ class MacroAssembler: public Assembler {
|
||||
// This is the base routine called by the different versions of call_VM_leaf. The interpreter
|
||||
// may customize this version by overriding it for its purposes (e.g., to save/restore
|
||||
// additional registers when doing a VM call).
|
||||
#define COMMA ,
|
||||
|
||||
virtual void call_VM_leaf_base(
|
||||
address entry_point, // the entry point
|
||||
@ -903,35 +902,66 @@ class MacroAssembler: public Assembler {
|
||||
void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
|
||||
void ldmxcsr(AddressLiteral src);
|
||||
|
||||
void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
|
||||
XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
|
||||
Register buf, Register state, Register ofs, Register limit, Register rsp,
|
||||
bool multi_block);
|
||||
|
||||
#ifdef _LP64
|
||||
void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
|
||||
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
|
||||
Register buf, Register state, Register ofs, Register limit, Register rsp,
|
||||
bool multi_block, XMMRegister shuf_mask);
|
||||
#else
|
||||
void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
|
||||
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
|
||||
Register buf, Register state, Register ofs, Register limit, Register rsp,
|
||||
bool multi_block);
|
||||
#endif
|
||||
|
||||
void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp);
|
||||
|
||||
#ifdef _LP64
|
||||
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1 LP64_ONLY(COMMA Register tmp2));
|
||||
Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
|
||||
|
||||
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||
Register rdx NOT_LP64(COMMA Register tmp) LP64_ONLY(COMMA Register tmp1)
|
||||
LP64_ONLY(COMMA Register tmp2) LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4));
|
||||
Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
|
||||
|
||||
void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rbx LP64_ONLY(COMMA Register rcx), Register rdx
|
||||
LP64_ONLY(COMMA Register tmp1) LP64_ONLY(COMMA Register tmp2)
|
||||
LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4));
|
||||
Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2,
|
||||
Register tmp3, Register tmp4);
|
||||
|
||||
void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx NOT_LP64(COMMA Register tmp)
|
||||
LP64_ONLY(COMMA Register r8) LP64_ONLY(COMMA Register r9)
|
||||
LP64_ONLY(COMMA Register r10) LP64_ONLY(COMMA Register r11));
|
||||
Register rax, Register rcx, Register rdx, Register tmp1,
|
||||
Register tmp2, Register tmp3, Register tmp4);
|
||||
#else
|
||||
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1);
|
||||
|
||||
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||
Register rdx, Register tmp);
|
||||
|
||||
void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rbx, Register rdx);
|
||||
|
||||
void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp);
|
||||
|
||||
#ifndef _LP64
|
||||
void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
|
||||
Register edx, Register ebx, Register esi, Register edi,
|
||||
Register ebp, Register esp);
|
||||
|
||||
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
|
||||
Register esi, Register edi, Register ebp, Register esp);
|
||||
#endif
|
||||
@ -1185,14 +1215,131 @@ public:
|
||||
void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
|
||||
void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
|
||||
|
||||
// Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
|
||||
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
if (UseAVX > 1) // vinserti128h is available only in AVX2
|
||||
Assembler::vinserti128h(dst, nds, src);
|
||||
else
|
||||
Assembler::vinsertf128h(dst, nds, src);
|
||||
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||
if (UseAVX > 1) { // vinserti128 is available only in AVX2
|
||||
Assembler::vinserti128(dst, nds, src, imm8);
|
||||
} else {
|
||||
Assembler::vinsertf128(dst, nds, src, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||
if (UseAVX > 1) { // vinserti128 is available only in AVX2
|
||||
Assembler::vinserti128(dst, nds, src, imm8);
|
||||
} else {
|
||||
Assembler::vinsertf128(dst, nds, src, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
if (UseAVX > 1) { // vextracti128 is available only in AVX2
|
||||
Assembler::vextracti128(dst, src, imm8);
|
||||
} else {
|
||||
Assembler::vextractf128(dst, src, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
void vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
|
||||
if (UseAVX > 1) { // vextracti128 is available only in AVX2
|
||||
Assembler::vextracti128(dst, src, imm8);
|
||||
} else {
|
||||
Assembler::vextractf128(dst, src, imm8);
|
||||
}
|
||||
}
|
||||
|
||||
// 128bit copy to/from high 128 bits of 256bit (YMM) vector registers
|
||||
void vinserti128_high(XMMRegister dst, XMMRegister src) {
|
||||
vinserti128(dst, dst, src, 1);
|
||||
}
|
||||
void vinserti128_high(XMMRegister dst, Address src) {
|
||||
vinserti128(dst, dst, src, 1);
|
||||
}
|
||||
void vextracti128_high(XMMRegister dst, XMMRegister src) {
|
||||
vextracti128(dst, src, 1);
|
||||
}
|
||||
void vextracti128_high(Address dst, XMMRegister src) {
|
||||
vextracti128(dst, src, 1);
|
||||
}
|
||||
void vinsertf128_high(XMMRegister dst, XMMRegister src) {
|
||||
vinsertf128(dst, dst, src, 1);
|
||||
}
|
||||
void vinsertf128_high(XMMRegister dst, Address src) {
|
||||
vinsertf128(dst, dst, src, 1);
|
||||
}
|
||||
void vextractf128_high(XMMRegister dst, XMMRegister src) {
|
||||
vextractf128(dst, src, 1);
|
||||
}
|
||||
void vextractf128_high(Address dst, XMMRegister src) {
|
||||
vextractf128(dst, src, 1);
|
||||
}
|
||||
|
||||
// 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers
|
||||
void vinserti64x4_high(XMMRegister dst, XMMRegister src) {
|
||||
vinserti64x4(dst, dst, src, 1);
|
||||
}
|
||||
void vinsertf64x4_high(XMMRegister dst, XMMRegister src) {
|
||||
vinsertf64x4(dst, dst, src, 1);
|
||||
}
|
||||
void vextracti64x4_high(XMMRegister dst, XMMRegister src) {
|
||||
vextracti64x4(dst, src, 1);
|
||||
}
|
||||
void vextractf64x4_high(XMMRegister dst, XMMRegister src) {
|
||||
vextractf64x4(dst, src, 1);
|
||||
}
|
||||
void vextractf64x4_high(Address dst, XMMRegister src) {
|
||||
vextractf64x4(dst, src, 1);
|
||||
}
|
||||
void vinsertf64x4_high(XMMRegister dst, Address src) {
|
||||
vinsertf64x4(dst, dst, src, 1);
|
||||
}
|
||||
|
||||
// 128bit copy to/from low 128 bits of 256bit (YMM) vector registers
|
||||
void vinserti128_low(XMMRegister dst, XMMRegister src) {
|
||||
vinserti128(dst, dst, src, 0);
|
||||
}
|
||||
void vinserti128_low(XMMRegister dst, Address src) {
|
||||
vinserti128(dst, dst, src, 0);
|
||||
}
|
||||
void vextracti128_low(XMMRegister dst, XMMRegister src) {
|
||||
vextracti128(dst, src, 0);
|
||||
}
|
||||
void vextracti128_low(Address dst, XMMRegister src) {
|
||||
vextracti128(dst, src, 0);
|
||||
}
|
||||
void vinsertf128_low(XMMRegister dst, XMMRegister src) {
|
||||
vinsertf128(dst, dst, src, 0);
|
||||
}
|
||||
void vinsertf128_low(XMMRegister dst, Address src) {
|
||||
vinsertf128(dst, dst, src, 0);
|
||||
}
|
||||
void vextractf128_low(XMMRegister dst, XMMRegister src) {
|
||||
vextractf128(dst, src, 0);
|
||||
}
|
||||
void vextractf128_low(Address dst, XMMRegister src) {
|
||||
vextractf128(dst, src, 0);
|
||||
}
|
||||
|
||||
// 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers
|
||||
void vinserti64x4_low(XMMRegister dst, XMMRegister src) {
|
||||
vinserti64x4(dst, dst, src, 0);
|
||||
}
|
||||
void vinsertf64x4_low(XMMRegister dst, XMMRegister src) {
|
||||
vinsertf64x4(dst, dst, src, 0);
|
||||
}
|
||||
void vextracti64x4_low(XMMRegister dst, XMMRegister src) {
|
||||
vextracti64x4(dst, src, 0);
|
||||
}
|
||||
void vextractf64x4_low(XMMRegister dst, XMMRegister src) {
|
||||
vextractf64x4(dst, src, 0);
|
||||
}
|
||||
void vextractf64x4_low(Address dst, XMMRegister src) {
|
||||
vextractf64x4(dst, src, 0);
|
||||
}
|
||||
void vinsertf64x4_low(XMMRegister dst, Address src) {
|
||||
vinsertf64x4(dst, dst, src, 0);
|
||||
}
|
||||
|
||||
|
||||
// Carry-Less Multiplication Quadword
|
||||
void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
// 0x00 - multiply lower 64 bits [0:63]
|
||||
@ -1284,8 +1431,9 @@ public:
|
||||
// C2 compiled method's prolog code.
|
||||
void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b);
|
||||
|
||||
// clear memory of size 'cnt' qwords, starting at 'base'.
|
||||
void clear_mem(Register base, Register cnt, Register rtmp);
|
||||
// clear memory of size 'cnt' qwords, starting at 'base';
|
||||
// if 'is_large' is set, do not try to produce short loop
|
||||
void clear_mem(Register base, Register cnt, Register rtmp, bool is_large);
|
||||
|
||||
#ifdef COMPILER2
|
||||
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
|
||||
|
||||
495
hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp
Normal file
@ -0,0 +1,495 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
// ofs and limit are used for multi-block byte array.
|
||||
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||
void MacroAssembler::fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
|
||||
XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
|
||||
Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block) {
|
||||
|
||||
Label start, done_hash, loop0;
|
||||
|
||||
address upper_word_mask = StubRoutines::x86::upper_word_mask_addr();
|
||||
address shuffle_byte_flip_mask = StubRoutines::x86::shuffle_byte_flip_mask_addr();
|
||||
|
||||
bind(start);
|
||||
movdqu(abcd, Address(state, 0));
|
||||
pinsrd(e0, Address(state, 16), 3);
|
||||
movdqu(shuf_mask, ExternalAddress(upper_word_mask)); // 0xFFFFFFFF000000000000000000000000
|
||||
pand(e0, shuf_mask);
|
||||
pshufd(abcd, abcd, 0x1B);
|
||||
movdqu(shuf_mask, ExternalAddress(shuffle_byte_flip_mask)); //0x000102030405060708090a0b0c0d0e0f
|
||||
|
||||
bind(loop0);
|
||||
// Save hash values for addition after rounds
|
||||
movdqu(Address(rsp, 0), e0);
|
||||
movdqu(Address(rsp, 16), abcd);
|
||||
|
||||
|
||||
// Rounds 0 - 3
|
||||
movdqu(msg0, Address(buf, 0));
|
||||
pshufb(msg0, shuf_mask);
|
||||
paddd(e0, msg0);
|
||||
movdqa(e1, abcd);
|
||||
sha1rnds4(abcd, e0, 0);
|
||||
|
||||
// Rounds 4 - 7
|
||||
movdqu(msg1, Address(buf, 16));
|
||||
pshufb(msg1, shuf_mask);
|
||||
sha1nexte(e1, msg1);
|
||||
movdqa(e0, abcd);
|
||||
sha1rnds4(abcd, e1, 0);
|
||||
sha1msg1(msg0, msg1);
|
||||
|
||||
// Rounds 8 - 11
|
||||
movdqu(msg2, Address(buf, 32));
|
||||
pshufb(msg2, shuf_mask);
|
||||
sha1nexte(e0, msg2);
|
||||
movdqa(e1, abcd);
|
||||
sha1rnds4(abcd, e0, 0);
|
||||
sha1msg1(msg1, msg2);
|
||||
pxor(msg0, msg2);
|
||||
|
||||
// Rounds 12 - 15
|
||||
movdqu(msg3, Address(buf, 48));
|
||||
pshufb(msg3, shuf_mask);
|
||||
sha1nexte(e1, msg3);
|
||||
movdqa(e0, abcd);
|
||||
sha1msg2(msg0, msg3);
|
||||
sha1rnds4(abcd, e1, 0);
|
||||
sha1msg1(msg2, msg3);
|
||||
pxor(msg1, msg3);
|
||||
|
||||
// Rounds 16 - 19
|
||||
sha1nexte(e0, msg0);
|
||||
movdqa(e1, abcd);
|
||||
sha1msg2(msg1, msg0);
|
||||
sha1rnds4(abcd, e0, 0);
|
||||
sha1msg1(msg3, msg0);
|
||||
pxor(msg2, msg0);
|
||||
|
||||
// Rounds 20 - 23
|
||||
sha1nexte(e1, msg1);
|
||||
movdqa(e0, abcd);
|
||||
sha1msg2(msg2, msg1);
|
||||
sha1rnds4(abcd, e1, 1);
|
||||
sha1msg1(msg0, msg1);
|
||||
pxor(msg3, msg1);
|
||||
|
||||
// Rounds 24 - 27
|
||||
sha1nexte(e0, msg2);
|
||||
movdqa(e1, abcd);
|
||||
sha1msg2(msg3, msg2);
|
||||
sha1rnds4(abcd, e0, 1);
|
||||
sha1msg1(msg1, msg2);
|
||||
pxor(msg0, msg2);
|
||||
|
||||
// Rounds 28 - 31
|
||||
sha1nexte(e1, msg3);
|
||||
movdqa(e0, abcd);
|
||||
sha1msg2(msg0, msg3);
|
||||
sha1rnds4(abcd, e1, 1);
|
||||
sha1msg1(msg2, msg3);
|
||||
pxor(msg1, msg3);
|
||||
|
||||
// Rounds 32 - 35
|
||||
sha1nexte(e0, msg0);
|
||||
movdqa(e1, abcd);
|
||||
sha1msg2(msg1, msg0);
|
||||
sha1rnds4(abcd, e0, 1);
|
||||
sha1msg1(msg3, msg0);
|
||||
pxor(msg2, msg0);
|
||||
|
||||
// Rounds 36 - 39
|
||||
sha1nexte(e1, msg1);
|
||||
movdqa(e0, abcd);
|
||||
sha1msg2(msg2, msg1);
|
||||
sha1rnds4(abcd, e1, 1);
|
||||
sha1msg1(msg0, msg1);
|
||||
pxor(msg3, msg1);
|
||||
|
||||
// Rounds 40 - 43
|
||||
sha1nexte(e0, msg2);
|
||||
movdqa(e1, abcd);
|
||||
sha1msg2(msg3, msg2);
|
||||
sha1rnds4(abcd, e0, 2);
|
||||
sha1msg1(msg1, msg2);
|
||||
pxor(msg0, msg2);
|
||||
|
||||
// Rounds 44 - 47
|
||||
sha1nexte(e1, msg3);
|
||||
movdqa(e0, abcd);
|
||||
sha1msg2(msg0, msg3);
|
||||
sha1rnds4(abcd, e1, 2);
|
||||
sha1msg1(msg2, msg3);
|
||||
pxor(msg1, msg3);
|
||||
|
||||
// Rounds 48 - 51
|
||||
sha1nexte(e0, msg0);
|
||||
movdqa(e1, abcd);
|
||||
sha1msg2(msg1, msg0);
|
||||
sha1rnds4(abcd, e0, 2);
|
||||
sha1msg1(msg3, msg0);
|
||||
pxor(msg2, msg0);
|
||||
|
||||
// Rounds 52 - 55
|
||||
sha1nexte(e1, msg1);
|
||||
movdqa(e0, abcd);
|
||||
sha1msg2(msg2, msg1);
|
||||
sha1rnds4(abcd, e1, 2);
|
||||
sha1msg1(msg0, msg1);
|
||||
pxor(msg3, msg1);
|
||||
|
||||
// Rounds 56 - 59
|
||||
sha1nexte(e0, msg2);
|
||||
movdqa(e1, abcd);
|
||||
sha1msg2(msg3, msg2);
|
||||
sha1rnds4(abcd, e0, 2);
|
||||
sha1msg1(msg1, msg2);
|
||||
pxor(msg0, msg2);
|
||||
|
||||
// Rounds 60 - 63
|
||||
sha1nexte(e1, msg3);
|
||||
movdqa(e0, abcd);
|
||||
sha1msg2(msg0, msg3);
|
||||
sha1rnds4(abcd, e1, 3);
|
||||
sha1msg1(msg2, msg3);
|
||||
pxor(msg1, msg3);
|
||||
|
||||
// Rounds 64 - 67
|
||||
sha1nexte(e0, msg0);
|
||||
movdqa(e1, abcd);
|
||||
sha1msg2(msg1, msg0);
|
||||
sha1rnds4(abcd, e0, 3);
|
||||
sha1msg1(msg3, msg0);
|
||||
pxor(msg2, msg0);
|
||||
|
||||
// Rounds 68 - 71
|
||||
sha1nexte(e1, msg1);
|
||||
movdqa(e0, abcd);
|
||||
sha1msg2(msg2, msg1);
|
||||
sha1rnds4(abcd, e1, 3);
|
||||
pxor(msg3, msg1);
|
||||
|
||||
// Rounds 72 - 75
|
||||
sha1nexte(e0, msg2);
|
||||
movdqa(e1, abcd);
|
||||
sha1msg2(msg3, msg2);
|
||||
sha1rnds4(abcd, e0, 3);
|
||||
|
||||
// Rounds 76 - 79
|
||||
sha1nexte(e1, msg3);
|
||||
movdqa(e0, abcd);
|
||||
sha1rnds4(abcd, e1, 3);
|
||||
|
||||
// add current hash values with previously saved
|
||||
movdqu(msg0, Address(rsp, 0));
|
||||
sha1nexte(e0, msg0);
|
||||
movdqu(msg0, Address(rsp, 16));
|
||||
paddd(abcd, msg0);
|
||||
|
||||
if (multi_block) {
|
||||
// increment data pointer and loop if more to process
|
||||
addptr(buf, 64);
|
||||
addptr(ofs, 64);
|
||||
cmpptr(ofs, limit);
|
||||
jcc(Assembler::belowEqual, loop0);
|
||||
movptr(rax, ofs); //return ofs
|
||||
}
|
||||
// write hash values back in the correct order
|
||||
pshufd(abcd, abcd, 0x1b);
|
||||
movdqu(Address(state, 0), abcd);
|
||||
pextrd(Address(state, 16), e0, 3);
|
||||
|
||||
bind(done_hash);
|
||||
|
||||
}
|
||||
|
||||
// xmm0 (msg) is used as an implicit argument to sh256rnds2
|
||||
// and state0 and state1 can never use xmm0 register.
|
||||
// ofs and limit are used for multi-block byte array.
|
||||
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||
#ifdef _LP64
|
||||
void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
|
||||
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
|
||||
Register buf, Register state, Register ofs, Register limit, Register rsp,
|
||||
bool multi_block, XMMRegister shuf_mask) {
|
||||
#else
|
||||
void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
|
||||
XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
|
||||
Register buf, Register state, Register ofs, Register limit, Register rsp,
|
||||
bool multi_block) {
|
||||
#endif
|
||||
Label start, done_hash, loop0;
|
||||
|
||||
address K256 = StubRoutines::x86::k256_addr();
|
||||
address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr();
|
||||
|
||||
bind(start);
|
||||
movdqu(state0, Address(state, 0));
|
||||
movdqu(state1, Address(state, 16));
|
||||
|
||||
pshufd(state0, state0, 0xB1);
|
||||
pshufd(state1, state1, 0x1B);
|
||||
movdqa(msgtmp4, state0);
|
||||
palignr(state0, state1, 8);
|
||||
pblendw(state1, msgtmp4, 0xF0);
|
||||
|
||||
#ifdef _LP64
|
||||
movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask));
|
||||
#endif
|
||||
lea(rax, ExternalAddress(K256));
|
||||
|
||||
bind(loop0);
|
||||
movdqu(Address(rsp, 0), state0);
|
||||
movdqu(Address(rsp, 16), state1);
|
||||
|
||||
// Rounds 0-3
|
||||
movdqu(msg, Address(buf, 0));
|
||||
#ifdef _LP64
|
||||
pshufb(msg, shuf_mask);
|
||||
#else
|
||||
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
|
||||
#endif
|
||||
movdqa(msgtmp0, msg);
|
||||
paddd(msg, Address(rax, 0));
|
||||
sha256rnds2(state1, state0);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
|
||||
// Rounds 4-7
|
||||
movdqu(msg, Address(buf, 16));
|
||||
#ifdef _LP64
|
||||
pshufb(msg, shuf_mask);
|
||||
#else
|
||||
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
|
||||
#endif
|
||||
movdqa(msgtmp1, msg);
|
||||
paddd(msg, Address(rax, 16));
|
||||
sha256rnds2(state1, state0);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp0, msgtmp1);
|
||||
|
||||
// Rounds 8-11
|
||||
movdqu(msg, Address(buf, 32));
|
||||
#ifdef _LP64
|
||||
pshufb(msg, shuf_mask);
|
||||
#else
|
||||
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
|
||||
#endif
|
||||
movdqa(msgtmp2, msg);
|
||||
paddd(msg, Address(rax, 32));
|
||||
sha256rnds2(state1, state0);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp1, msgtmp2);
|
||||
|
||||
// Rounds 12-15
|
||||
movdqu(msg, Address(buf, 48));
|
||||
#ifdef _LP64
|
||||
pshufb(msg, shuf_mask);
|
||||
#else
|
||||
pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
|
||||
#endif
|
||||
movdqa(msgtmp3, msg);
|
||||
paddd(msg, Address(rax, 48));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp3);
|
||||
palignr(msgtmp4, msgtmp2, 4);
|
||||
paddd(msgtmp0, msgtmp4);
|
||||
sha256msg2(msgtmp0, msgtmp3);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp2, msgtmp3);
|
||||
|
||||
// Rounds 16-19
|
||||
movdqa(msg, msgtmp0);
|
||||
paddd(msg, Address(rax, 64));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp0);
|
||||
palignr(msgtmp4, msgtmp3, 4);
|
||||
paddd(msgtmp1, msgtmp4);
|
||||
sha256msg2(msgtmp1, msgtmp0);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp3, msgtmp0);
|
||||
|
||||
// Rounds 20-23
|
||||
movdqa(msg, msgtmp1);
|
||||
paddd(msg, Address(rax, 80));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp1);
|
||||
palignr(msgtmp4, msgtmp0, 4);
|
||||
paddd(msgtmp2, msgtmp4);
|
||||
sha256msg2(msgtmp2, msgtmp1);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp0, msgtmp1);
|
||||
|
||||
// Rounds 24-27
|
||||
movdqa(msg, msgtmp2);
|
||||
paddd(msg, Address(rax, 96));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp2);
|
||||
palignr(msgtmp4, msgtmp1, 4);
|
||||
paddd(msgtmp3, msgtmp4);
|
||||
sha256msg2(msgtmp3, msgtmp2);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp1, msgtmp2);
|
||||
|
||||
// Rounds 28-31
|
||||
movdqa(msg, msgtmp3);
|
||||
paddd(msg, Address(rax, 112));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp3);
|
||||
palignr(msgtmp4, msgtmp2, 4);
|
||||
paddd(msgtmp0, msgtmp4);
|
||||
sha256msg2(msgtmp0, msgtmp3);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp2, msgtmp3);
|
||||
|
||||
// Rounds 32-35
|
||||
movdqa(msg, msgtmp0);
|
||||
paddd(msg, Address(rax, 128));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp0);
|
||||
palignr(msgtmp4, msgtmp3, 4);
|
||||
paddd(msgtmp1, msgtmp4);
|
||||
sha256msg2(msgtmp1, msgtmp0);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp3, msgtmp0);
|
||||
|
||||
// Rounds 36-39
|
||||
movdqa(msg, msgtmp1);
|
||||
paddd(msg, Address(rax, 144));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp1);
|
||||
palignr(msgtmp4, msgtmp0, 4);
|
||||
paddd(msgtmp2, msgtmp4);
|
||||
sha256msg2(msgtmp2, msgtmp1);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp0, msgtmp1);
|
||||
|
||||
// Rounds 40-43
|
||||
movdqa(msg, msgtmp2);
|
||||
paddd(msg, Address(rax, 160));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp2);
|
||||
palignr(msgtmp4, msgtmp1, 4);
|
||||
paddd(msgtmp3, msgtmp4);
|
||||
sha256msg2(msgtmp3, msgtmp2);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp1, msgtmp2);
|
||||
|
||||
// Rounds 44-47
|
||||
movdqa(msg, msgtmp3);
|
||||
paddd(msg, Address(rax, 176));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp3);
|
||||
palignr(msgtmp4, msgtmp2, 4);
|
||||
paddd(msgtmp0, msgtmp4);
|
||||
sha256msg2(msgtmp0, msgtmp3);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp2, msgtmp3);
|
||||
|
||||
// Rounds 48-51
|
||||
movdqa(msg, msgtmp0);
|
||||
paddd(msg, Address(rax, 192));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp0);
|
||||
palignr(msgtmp4, msgtmp3, 4);
|
||||
paddd(msgtmp1, msgtmp4);
|
||||
sha256msg2(msgtmp1, msgtmp0);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
sha256msg1(msgtmp3, msgtmp0);
|
||||
|
||||
// Rounds 52-55
|
||||
movdqa(msg, msgtmp1);
|
||||
paddd(msg, Address(rax, 208));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp1);
|
||||
palignr(msgtmp4, msgtmp0, 4);
|
||||
paddd(msgtmp2, msgtmp4);
|
||||
sha256msg2(msgtmp2, msgtmp1);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
|
||||
// Rounds 56-59
|
||||
movdqa(msg, msgtmp2);
|
||||
paddd(msg, Address(rax, 224));
|
||||
sha256rnds2(state1, state0);
|
||||
movdqa(msgtmp4, msgtmp2);
|
||||
palignr(msgtmp4, msgtmp1, 4);
|
||||
paddd(msgtmp3, msgtmp4);
|
||||
sha256msg2(msgtmp3, msgtmp2);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
|
||||
// Rounds 60-63
|
||||
movdqa(msg, msgtmp3);
|
||||
paddd(msg, Address(rax, 240));
|
||||
sha256rnds2(state1, state0);
|
||||
pshufd(msg, msg, 0x0E);
|
||||
sha256rnds2(state0, state1);
|
||||
movdqu(msg, Address(rsp, 0));
|
||||
paddd(state0, msg);
|
||||
movdqu(msg, Address(rsp, 16));
|
||||
paddd(state1, msg);
|
||||
|
||||
if (multi_block) {
|
||||
// increment data pointer and loop if more to process
|
||||
addptr(buf, 64);
|
||||
addptr(ofs, 64);
|
||||
cmpptr(ofs, limit);
|
||||
jcc(Assembler::belowEqual, loop0);
|
||||
movptr(rax, ofs); //return ofs
|
||||
}
|
||||
|
||||
pshufd(state0, state0, 0x1B);
|
||||
pshufd(state1, state1, 0xB1);
|
||||
movdqa(msgtmp4, state0);
|
||||
pblendw(state0, state1, 0xF0);
|
||||
palignr(state1, msgtmp4, 8);
|
||||
|
||||
movdqu(Address(state, 0), state0);
|
||||
movdqu(Address(state, 16), state1);
|
||||
|
||||
bind(done_hash);
|
||||
|
||||
}
|
||||
@ -208,13 +208,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
__ subptr(rsp, ymm_bytes);
|
||||
// Save upper half of YMM registers
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
|
||||
__ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
|
||||
}
|
||||
if (UseAVX > 2) {
|
||||
__ subptr(rsp, zmm_bytes);
|
||||
// Save upper half of ZMM registers
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
|
||||
__ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -304,13 +304,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
|
||||
if (UseAVX > 2) {
|
||||
// Restore upper half of ZMM registers.
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
|
||||
__ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
|
||||
}
|
||||
__ addptr(rsp, zmm_bytes);
|
||||
}
|
||||
// Restore upper half of YMM registers.
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
|
||||
__ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
|
||||
}
|
||||
__ addptr(rsp, ymm_bytes);
|
||||
}
|
||||
|
||||
@ -179,13 +179,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
// Save upper half of YMM registers(0..15)
|
||||
int base_addr = XSAVE_AREA_YMM_BEGIN;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
__ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n));
|
||||
__ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n));
|
||||
}
|
||||
if (VM_Version::supports_evex()) {
|
||||
// Save upper half of ZMM registers(0..15)
|
||||
base_addr = XSAVE_AREA_ZMM_BEGIN;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
__ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1);
|
||||
__ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
|
||||
}
|
||||
// Save full ZMM registers(16..num_xmm_regs)
|
||||
base_addr = XSAVE_AREA_UPPERBANK;
|
||||
@ -333,13 +333,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
|
||||
// Restore upper half of YMM registers (0..15)
|
||||
int base_addr = XSAVE_AREA_YMM_BEGIN;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
__ vinsertf128h(as_XMMRegister(n), Address(rsp, base_addr+n*16));
|
||||
__ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
|
||||
}
|
||||
if (VM_Version::supports_evex()) {
|
||||
// Restore upper half of ZMM registers (0..15)
|
||||
base_addr = XSAVE_AREA_ZMM_BEGIN;
|
||||
for (int n = 0; n < 16; n++) {
|
||||
__ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1);
|
||||
__ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
|
||||
}
|
||||
// Restore full ZMM registers(16..num_xmm_regs)
|
||||
base_addr = XSAVE_AREA_UPPERBANK;
|
||||
|
||||
@ -3068,6 +3068,136 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_upper_word_mask() {
|
||||
__ align(64);
|
||||
StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
|
||||
address start = __ pc();
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0xFFFFFFFF, relocInfo::none, 0);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_shuffle_byte_flip_mask() {
|
||||
__ align(64);
|
||||
StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
|
||||
address start = __ pc();
|
||||
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
|
||||
__ emit_data(0x08090a0b, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
return start;
|
||||
}
|
||||
|
||||
// ofs and limit are use for multi-block byte array.
|
||||
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||
address generate_sha1_implCompress(bool multi_block, const char *name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Register buf = rax;
|
||||
Register state = rdx;
|
||||
Register ofs = rcx;
|
||||
Register limit = rdi;
|
||||
|
||||
const Address buf_param(rbp, 8 + 0);
|
||||
const Address state_param(rbp, 8 + 4);
|
||||
const Address ofs_param(rbp, 8 + 8);
|
||||
const Address limit_param(rbp, 8 + 12);
|
||||
|
||||
const XMMRegister abcd = xmm0;
|
||||
const XMMRegister e0 = xmm1;
|
||||
const XMMRegister e1 = xmm2;
|
||||
const XMMRegister msg0 = xmm3;
|
||||
|
||||
const XMMRegister msg1 = xmm4;
|
||||
const XMMRegister msg2 = xmm5;
|
||||
const XMMRegister msg3 = xmm6;
|
||||
const XMMRegister shuf_mask = xmm7;
|
||||
|
||||
__ enter();
|
||||
__ subptr(rsp, 8 * wordSize);
|
||||
if (multi_block) {
|
||||
__ push(limit);
|
||||
}
|
||||
__ movptr(buf, buf_param);
|
||||
__ movptr(state, state_param);
|
||||
if (multi_block) {
|
||||
__ movptr(ofs, ofs_param);
|
||||
__ movptr(limit, limit_param);
|
||||
}
|
||||
|
||||
__ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
|
||||
buf, state, ofs, limit, rsp, multi_block);
|
||||
|
||||
if (multi_block) {
|
||||
__ pop(limit);
|
||||
}
|
||||
__ addptr(rsp, 8 * wordSize);
|
||||
__ leave();
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_pshuffle_byte_flip_mask() {
|
||||
__ align(64);
|
||||
StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
|
||||
address start = __ pc();
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x08090a0b, relocInfo::none, 0);
|
||||
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
|
||||
return start;
|
||||
}
|
||||
|
||||
// ofs and limit are use for multi-block byte array.
|
||||
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||
address generate_sha256_implCompress(bool multi_block, const char *name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Register buf = rbx;
|
||||
Register state = rsi;
|
||||
Register ofs = rdx;
|
||||
Register limit = rcx;
|
||||
|
||||
const Address buf_param(rbp, 8 + 0);
|
||||
const Address state_param(rbp, 8 + 4);
|
||||
const Address ofs_param(rbp, 8 + 8);
|
||||
const Address limit_param(rbp, 8 + 12);
|
||||
|
||||
const XMMRegister msg = xmm0;
|
||||
const XMMRegister state0 = xmm1;
|
||||
const XMMRegister state1 = xmm2;
|
||||
const XMMRegister msgtmp0 = xmm3;
|
||||
|
||||
const XMMRegister msgtmp1 = xmm4;
|
||||
const XMMRegister msgtmp2 = xmm5;
|
||||
const XMMRegister msgtmp3 = xmm6;
|
||||
const XMMRegister msgtmp4 = xmm7;
|
||||
|
||||
__ enter();
|
||||
__ subptr(rsp, 8 * wordSize);
|
||||
handleSOERegisters(true /*saving*/);
|
||||
__ movptr(buf, buf_param);
|
||||
__ movptr(state, state_param);
|
||||
if (multi_block) {
|
||||
__ movptr(ofs, ofs_param);
|
||||
__ movptr(limit, limit_param);
|
||||
}
|
||||
|
||||
__ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
|
||||
buf, state, ofs, limit, rsp, multi_block);
|
||||
|
||||
handleSOERegisters(false);
|
||||
__ addptr(rsp, 8 * wordSize);
|
||||
__ leave();
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
|
||||
// byte swap x86 long
|
||||
address generate_ghash_long_swap_mask() {
|
||||
@ -3772,6 +3902,19 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
|
||||
}
|
||||
|
||||
if (UseSHA1Intrinsics) {
|
||||
StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
|
||||
StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
|
||||
StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
|
||||
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
|
||||
}
|
||||
if (UseSHA256Intrinsics) {
|
||||
StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
|
||||
StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
|
||||
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
|
||||
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
|
||||
}
|
||||
|
||||
// Generate GHASH intrinsics code
|
||||
if (UseGHASHIntrinsics) {
|
||||
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||
|
||||
@ -275,7 +275,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
if (VM_Version::supports_evex()) {
|
||||
for (int i = xmm_save_first; i <= last_reg; i++) {
|
||||
__ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0);
|
||||
__ vextractf32x4(xmm_save(i), as_XMMRegister(i), 0);
|
||||
}
|
||||
} else {
|
||||
for (int i = xmm_save_first; i <= last_reg; i++) {
|
||||
@ -393,7 +393,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// emit the restores for xmm regs
|
||||
if (VM_Version::supports_evex()) {
|
||||
for (int i = xmm_save_first; i <= last_reg; i++) {
|
||||
__ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0);
|
||||
__ vinsertf32x4(as_XMMRegister(i), as_XMMRegister(i), xmm_save(i), 0);
|
||||
}
|
||||
} else {
|
||||
for (int i = xmm_save_first; i <= last_reg; i++) {
|
||||
@ -3695,6 +3695,133 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_upper_word_mask() {
|
||||
__ align(64);
|
||||
StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0xFFFFFFFF00000000, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_shuffle_byte_flip_mask() {
|
||||
__ align(64);
|
||||
StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
// ofs and limit are use for multi-block byte array.
|
||||
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||
address generate_sha1_implCompress(bool multi_block, const char *name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Register buf = c_rarg0;
|
||||
Register state = c_rarg1;
|
||||
Register ofs = c_rarg2;
|
||||
Register limit = c_rarg3;
|
||||
|
||||
const XMMRegister abcd = xmm0;
|
||||
const XMMRegister e0 = xmm1;
|
||||
const XMMRegister e1 = xmm2;
|
||||
const XMMRegister msg0 = xmm3;
|
||||
|
||||
const XMMRegister msg1 = xmm4;
|
||||
const XMMRegister msg2 = xmm5;
|
||||
const XMMRegister msg3 = xmm6;
|
||||
const XMMRegister shuf_mask = xmm7;
|
||||
|
||||
__ enter();
|
||||
|
||||
#ifdef _WIN64
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||
#endif
|
||||
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
|
||||
__ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
|
||||
buf, state, ofs, limit, rsp, multi_block);
|
||||
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
#endif
|
||||
|
||||
__ leave();
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_pshuffle_byte_flip_mask() {
|
||||
__ align(64);
|
||||
StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x0405060700010203, relocInfo::none);
|
||||
__ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
// ofs and limit are use for multi-block byte array.
|
||||
// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
|
||||
address generate_sha256_implCompress(bool multi_block, const char *name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
Register buf = c_rarg0;
|
||||
Register state = c_rarg1;
|
||||
Register ofs = c_rarg2;
|
||||
Register limit = c_rarg3;
|
||||
|
||||
const XMMRegister msg = xmm0;
|
||||
const XMMRegister state0 = xmm1;
|
||||
const XMMRegister state1 = xmm2;
|
||||
const XMMRegister msgtmp0 = xmm3;
|
||||
|
||||
const XMMRegister msgtmp1 = xmm4;
|
||||
const XMMRegister msgtmp2 = xmm5;
|
||||
const XMMRegister msgtmp3 = xmm6;
|
||||
const XMMRegister msgtmp4 = xmm7;
|
||||
|
||||
const XMMRegister shuf_mask = xmm8;
|
||||
|
||||
__ enter();
|
||||
#ifdef _WIN64
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 6 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||
__ movdqu(Address(rsp, 4 * wordSize), xmm8);
|
||||
#endif
|
||||
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
|
||||
__ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
|
||||
buf, state, ofs, limit, rsp, multi_block, shuf_mask);
|
||||
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ movdqu(xmm8, Address(rsp, 4 * wordSize));
|
||||
__ addptr(rsp, 6 * wordSize);
|
||||
#endif
|
||||
__ leave();
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
|
||||
// This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
|
||||
// to hide instruction latency
|
||||
//
|
||||
@ -4974,6 +5101,19 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
|
||||
}
|
||||
|
||||
if (UseSHA1Intrinsics) {
|
||||
StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
|
||||
StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
|
||||
StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
|
||||
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
|
||||
}
|
||||
if (UseSHA256Intrinsics) {
|
||||
StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
|
||||
StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
|
||||
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
|
||||
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
|
||||
}
|
||||
|
||||
// Generate GHASH intrinsics code
|
||||
if (UseGHASHIntrinsics) {
|
||||
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||
|
||||
@ -29,6 +29,12 @@
|
||||
#include "runtime/thread.inline.hpp"
|
||||
#include "crc32c.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
// Implementation of the platform-specific part of StubRoutines - for
|
||||
// a description of how to extend it, see the stubRoutines.hpp file.
|
||||
|
||||
@ -37,6 +43,10 @@ address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
|
||||
address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
|
||||
address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
|
||||
address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
|
||||
address StubRoutines::x86::_upper_word_mask_addr = NULL;
|
||||
address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
|
||||
address StubRoutines::x86::_k256_adr = NULL;
|
||||
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
|
||||
|
||||
uint64_t StubRoutines::x86::_crc_by128_masks[] =
|
||||
{
|
||||
@ -236,3 +246,23 @@ void StubRoutines::x86::generate_CRC32C_table(bool is_pclmulqdq_table_supported)
|
||||
_crc32c_table = (juint*)pclmulqdq_table;
|
||||
}
|
||||
}
|
||||
|
||||
ALIGNED_(64) juint StubRoutines::x86::_k256[] =
|
||||
{
|
||||
0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
|
||||
0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
|
||||
0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
|
||||
0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
|
||||
0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
|
||||
0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
|
||||
0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
|
||||
0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
|
||||
0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
|
||||
0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
|
||||
0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
|
||||
0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
|
||||
0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
|
||||
0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
|
||||
0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
|
||||
0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
|
||||
};
|
||||
|
||||
@ -46,6 +46,17 @@
|
||||
static address _ghash_long_swap_mask_addr;
|
||||
static address _ghash_byte_swap_mask_addr;
|
||||
|
||||
// upper word mask for sha1
|
||||
static address _upper_word_mask_addr;
|
||||
// byte flip mask for sha1
|
||||
static address _shuffle_byte_flip_mask_addr;
|
||||
|
||||
//k256 table for sha256
|
||||
static juint _k256[];
|
||||
static address _k256_adr;
|
||||
// byte flip mask for sha256
|
||||
static address _pshuffle_byte_flip_mask_addr;
|
||||
|
||||
public:
|
||||
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
|
||||
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
|
||||
@ -53,5 +64,9 @@
|
||||
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
|
||||
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
|
||||
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
|
||||
static address upper_word_mask_addr() { return _upper_word_mask_addr; }
|
||||
static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; }
|
||||
static address k256_addr() { return _k256_adr; }
|
||||
static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
|
||||
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
|
||||
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
|
||||
|
||||
@ -68,10 +68,11 @@
|
||||
declare_constant(VM_Version::CPU_AVX512DQ) \
|
||||
declare_constant(VM_Version::CPU_AVX512PF) \
|
||||
declare_constant(VM_Version::CPU_AVX512ER) \
|
||||
declare_constant(VM_Version::CPU_AVX512CD) \
|
||||
declare_constant(VM_Version::CPU_AVX512BW)
|
||||
declare_constant(VM_Version::CPU_AVX512CD)
|
||||
|
||||
#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
|
||||
declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL)
|
||||
declare_preprocessor_constant("VM_Version::CPU_AVX512BW", CPU_AVX512BW) \
|
||||
declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) \
|
||||
declare_preprocessor_constant("VM_Version::CPU_SHA", CPU_SHA)
|
||||
|
||||
#endif // CPU_X86_VM_VMSTRUCTS_X86_HPP
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -385,7 +385,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ movdl(xmm0, rcx);
|
||||
__ pshufd(xmm0, xmm0, 0x00);
|
||||
__ vinsertf128h(xmm0, xmm0, xmm0);
|
||||
__ vinsertf128_high(xmm0, xmm0);
|
||||
__ vmovdqu(xmm7, xmm0);
|
||||
#ifdef _LP64
|
||||
__ vmovdqu(xmm8, xmm0);
|
||||
@ -577,7 +577,7 @@ void VM_Version::get_processor_features() {
|
||||
}
|
||||
|
||||
char buf[256];
|
||||
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
cores_per_cpu(), threads_per_core(),
|
||||
cpu_family(), _model, _stepping,
|
||||
(supports_cmov() ? ", cmov" : ""),
|
||||
@ -608,7 +608,8 @@ void VM_Version::get_processor_features() {
|
||||
(supports_bmi1() ? ", bmi1" : ""),
|
||||
(supports_bmi2() ? ", bmi2" : ""),
|
||||
(supports_adx() ? ", adx" : ""),
|
||||
(supports_evex() ? ", evex" : ""));
|
||||
(supports_evex() ? ", evex" : ""),
|
||||
(supports_sha() ? ", sha" : ""));
|
||||
_features_string = os::strdup(buf);
|
||||
|
||||
// UseSSE is set to the smaller of what hardware supports and what
|
||||
@ -730,17 +731,29 @@ void VM_Version::get_processor_features() {
|
||||
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
|
||||
}
|
||||
|
||||
if (UseSHA) {
|
||||
if (supports_sha()) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA)) {
|
||||
UseSHA = true;
|
||||
}
|
||||
} else if (UseSHA) {
|
||||
warning("SHA instructions are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseSHA, false);
|
||||
}
|
||||
|
||||
if (UseSHA1Intrinsics) {
|
||||
if (UseSHA) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
|
||||
}
|
||||
} else if (UseSHA1Intrinsics) {
|
||||
warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseSHA256Intrinsics) {
|
||||
if (UseSHA) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
|
||||
}
|
||||
} else if (UseSHA256Intrinsics) {
|
||||
warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
|
||||
}
|
||||
@ -750,6 +763,10 @@ void VM_Version::get_processor_features() {
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||
}
|
||||
|
||||
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA, false);
|
||||
}
|
||||
|
||||
if (UseAdler32Intrinsics) {
|
||||
warning("Adler32Intrinsics not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
||||
|
||||
@ -221,7 +221,7 @@ class VM_Version : public Abstract_VM_Version {
|
||||
avx512pf : 1,
|
||||
avx512er : 1,
|
||||
avx512cd : 1,
|
||||
: 1,
|
||||
sha : 1,
|
||||
avx512bw : 1,
|
||||
avx512vl : 1;
|
||||
} bits;
|
||||
@ -282,11 +282,13 @@ protected:
|
||||
CPU_AVX512DQ = (1 << 27),
|
||||
CPU_AVX512PF = (1 << 28),
|
||||
CPU_AVX512ER = (1 << 29),
|
||||
CPU_AVX512CD = (1 << 30),
|
||||
CPU_AVX512BW = (1 << 31)
|
||||
CPU_AVX512CD = (1 << 30)
|
||||
// Keeping sign bit 31 unassigned.
|
||||
};
|
||||
|
||||
#define CPU_AVX512VL UCONST64(0x100000000) // EVEX instructions with smaller vector length : enums are limited to 32bit
|
||||
#define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
|
||||
#define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
|
||||
#define CPU_SHA ((uint64_t)UCONST64(0x400000000)) // SHA instructions
|
||||
|
||||
enum Extended_Family {
|
||||
// AMD
|
||||
@ -516,6 +518,8 @@ protected:
|
||||
result |= CPU_ADX;
|
||||
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
|
||||
result |= CPU_BMI2;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
|
||||
result |= CPU_SHA;
|
||||
if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
|
||||
result |= CPU_LZCNT;
|
||||
// for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
|
||||
@ -721,6 +725,7 @@ public:
|
||||
static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
|
||||
static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
|
||||
static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
|
||||
static bool supports_sha() { return (_features & CPU_SHA) != 0; }
|
||||
// Intel features
|
||||
static bool is_intel_family_core() { return is_intel() &&
|
||||
extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
|
||||
|
||||
@ -3179,13 +3179,13 @@ instruct Repl32B(vecY dst, rRegI src) %{
|
||||
"punpcklbw $dst,$dst\n\t"
|
||||
"pshuflw $dst,$dst,0x00\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate32B" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate32B" %}
|
||||
ins_encode %{
|
||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3196,12 +3196,12 @@ instruct Repl32B_mem(vecY dst, memory mem) %{
|
||||
format %{ "punpcklbw $dst,$mem\n\t"
|
||||
"pshuflw $dst,$dst,0x00\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate32B" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate32B" %}
|
||||
ins_encode %{
|
||||
__ punpcklbw($dst$$XMMRegister, $mem$$Address);
|
||||
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3223,11 +3223,11 @@ instruct Repl32B_imm(vecY dst, immI con) %{
|
||||
match(Set dst (ReplicateB con));
|
||||
format %{ "movq $dst,[$constantaddress]\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
|
||||
"vinserti128_high $dst,$dst\t! lreplicate32B($con)" %}
|
||||
ins_encode %{
|
||||
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3298,12 +3298,12 @@ instruct Repl16S(vecY dst, rRegI src) %{
|
||||
format %{ "movd $dst,$src\n\t"
|
||||
"pshuflw $dst,$dst,0x00\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate16S" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate16S" %}
|
||||
ins_encode %{
|
||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3313,11 +3313,11 @@ instruct Repl16S_mem(vecY dst, memory mem) %{
|
||||
match(Set dst (ReplicateS (LoadS mem)));
|
||||
format %{ "pshuflw $dst,$mem,0x00\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate16S" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate16S" %}
|
||||
ins_encode %{
|
||||
__ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3327,11 +3327,11 @@ instruct Repl16S_imm(vecY dst, immI con) %{
|
||||
match(Set dst (ReplicateS con));
|
||||
format %{ "movq $dst,[$constantaddress]\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate16S($con)" %}
|
||||
ins_encode %{
|
||||
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3363,11 +3363,11 @@ instruct Repl8I(vecY dst, rRegI src) %{
|
||||
match(Set dst (ReplicateI src));
|
||||
format %{ "movd $dst,$src\n\t"
|
||||
"pshufd $dst,$dst,0x00\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate8I" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate8I" %}
|
||||
ins_encode %{
|
||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3376,10 +3376,10 @@ instruct Repl8I_mem(vecY dst, memory mem) %{
|
||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateI (LoadI mem)));
|
||||
format %{ "pshufd $dst,$mem,0x00\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate8I" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate8I" %}
|
||||
ins_encode %{
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3401,11 +3401,11 @@ instruct Repl8I_imm(vecY dst, immI con) %{
|
||||
match(Set dst (ReplicateI con));
|
||||
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst" %}
|
||||
"vinserti128_high $dst,$dst" %}
|
||||
ins_encode %{
|
||||
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3430,11 +3430,11 @@ instruct Repl4L(vecY dst, rRegL src) %{
|
||||
match(Set dst (ReplicateL src));
|
||||
format %{ "movdq $dst,$src\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate4L" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate4L" %}
|
||||
ins_encode %{
|
||||
__ movdq($dst$$XMMRegister, $src$$Register);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3447,13 +3447,13 @@ instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
|
||||
"movdl $tmp,$src.hi\n\t"
|
||||
"punpckldq $dst,$tmp\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate4L" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate4L" %}
|
||||
ins_encode %{
|
||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
|
||||
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3464,11 +3464,11 @@ instruct Repl4L_imm(vecY dst, immL con) %{
|
||||
match(Set dst (ReplicateL con));
|
||||
format %{ "movq $dst,[$constantaddress]\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate4L($con)" %}
|
||||
ins_encode %{
|
||||
__ movq($dst$$XMMRegister, $constantaddress($con));
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3478,11 +3478,11 @@ instruct Repl4L_mem(vecY dst, memory mem) %{
|
||||
match(Set dst (ReplicateL (LoadL mem)));
|
||||
format %{ "movq $dst,$mem\n\t"
|
||||
"punpcklqdq $dst,$dst\n\t"
|
||||
"vinserti128h $dst,$dst,$dst\t! replicate4L" %}
|
||||
"vinserti128_high $dst,$dst\t! replicate4L" %}
|
||||
ins_encode %{
|
||||
__ movq($dst$$XMMRegister, $mem$$Address);
|
||||
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3511,10 +3511,10 @@ instruct Repl8F(vecY dst, regF src) %{
|
||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateF src));
|
||||
format %{ "pshufd $dst,$src,0x00\n\t"
|
||||
"vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
|
||||
"vinsertf128_high $dst,$dst\t! replicate8F" %}
|
||||
ins_encode %{
|
||||
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
|
||||
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3523,10 +3523,10 @@ instruct Repl8F_mem(vecY dst, memory mem) %{
|
||||
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateF (LoadF mem)));
|
||||
format %{ "pshufd $dst,$mem,0x00\n\t"
|
||||
"vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
|
||||
"vinsertf128_high $dst,$dst\t! replicate8F" %}
|
||||
ins_encode %{
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
|
||||
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3576,10 +3576,10 @@ instruct Repl4D(vecY dst, regD src) %{
|
||||
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateD src));
|
||||
format %{ "pshufd $dst,$src,0x44\n\t"
|
||||
"vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
|
||||
"vinsertf128_high $dst,$dst\t! replicate4D" %}
|
||||
ins_encode %{
|
||||
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
|
||||
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3588,10 +3588,10 @@ instruct Repl4D_mem(vecY dst, memory mem) %{
|
||||
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
|
||||
match(Set dst (ReplicateD (LoadD mem)));
|
||||
format %{ "pshufd $dst,$mem,0x44\n\t"
|
||||
"vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
|
||||
"vinsertf128_high $dst,$dst\t! replicate4D" %}
|
||||
ins_encode %{
|
||||
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
|
||||
__ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
|
||||
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -4791,7 +4791,7 @@ instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
|
||||
effect(TEMP tmp, TEMP tmp2);
|
||||
format %{ "vphaddd $tmp,$src2,$src2\n\t"
|
||||
"vphaddd $tmp,$tmp,$tmp2\n\t"
|
||||
"vextracti128 $tmp2,$tmp\n\t"
|
||||
"vextracti128_high $tmp2,$tmp\n\t"
|
||||
"vpaddd $tmp,$tmp,$tmp2\n\t"
|
||||
"movd $tmp2,$src1\n\t"
|
||||
"vpaddd $tmp2,$tmp2,$tmp\n\t"
|
||||
@ -4800,7 +4800,7 @@ instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
|
||||
int vector_len = 1;
|
||||
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
|
||||
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
||||
__ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
|
||||
__ movdl($tmp2$$XMMRegister, $src1$$Register);
|
||||
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||
@ -4813,7 +4813,7 @@ instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp,
|
||||
predicate(UseAVX > 2);
|
||||
match(Set dst (AddReductionVI src1 src2));
|
||||
effect(TEMP tmp, TEMP tmp2);
|
||||
format %{ "vextracti128 $tmp,$src2\n\t"
|
||||
format %{ "vextracti128_high $tmp,$src2\n\t"
|
||||
"vpaddd $tmp,$tmp,$src2\n\t"
|
||||
"pshufd $tmp2,$tmp,0xE\n\t"
|
||||
"vpaddd $tmp,$tmp,$tmp2\n\t"
|
||||
@ -4824,7 +4824,7 @@ instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp,
|
||||
"movd $dst,$tmp2\t! add reduction8I" %}
|
||||
ins_encode %{
|
||||
int vector_len = 0;
|
||||
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
|
||||
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
||||
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
||||
@ -4841,9 +4841,9 @@ instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp,
|
||||
predicate(UseAVX > 2);
|
||||
match(Set dst (AddReductionVI src1 src2));
|
||||
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
|
||||
format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
|
||||
format %{ "vextracti64x4_high $tmp3,$src2\n\t"
|
||||
"vpaddd $tmp3,$tmp3,$src2\n\t"
|
||||
"vextracti128 $tmp,$tmp3\n\t"
|
||||
"vextracti128_high $tmp,$tmp3\n\t"
|
||||
"vpaddd $tmp,$tmp,$tmp3\n\t"
|
||||
"pshufd $tmp2,$tmp,0xE\n\t"
|
||||
"vpaddd $tmp,$tmp,$tmp2\n\t"
|
||||
@ -4853,9 +4853,9 @@ instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp,
|
||||
"vpaddd $tmp2,$tmp,$tmp2\n\t"
|
||||
"movd $dst,$tmp2\t! mul reduction16I" %}
|
||||
ins_encode %{
|
||||
__ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
|
||||
__ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
|
||||
__ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
|
||||
__ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
|
||||
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
|
||||
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
||||
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
|
||||
@ -4892,7 +4892,7 @@ instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
|
||||
predicate(UseAVX > 2);
|
||||
match(Set dst (AddReductionVL src1 src2));
|
||||
effect(TEMP tmp, TEMP tmp2);
|
||||
format %{ "vextracti128 $tmp,$src2\n\t"
|
||||
format %{ "vextracti128_high $tmp,$src2\n\t"
|
||||
"vpaddq $tmp2,$tmp,$src2\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
||||
@ -4900,7 +4900,7 @@ instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
|
||||
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
||||
"movdq $dst,$tmp2\t! add reduction4L" %}
|
||||
ins_encode %{
|
||||
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||
@ -4915,9 +4915,9 @@ instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
|
||||
predicate(UseAVX > 2);
|
||||
match(Set dst (AddReductionVL src1 src2));
|
||||
effect(TEMP tmp, TEMP tmp2);
|
||||
format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
|
||||
format %{ "vextracti64x4_high $tmp2,$src2\n\t"
|
||||
"vpaddq $tmp2,$tmp2,$src2\n\t"
|
||||
"vextracti128 $tmp,$tmp2\n\t"
|
||||
"vextracti128_high $tmp,$tmp2\n\t"
|
||||
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
||||
@ -4925,9 +4925,9 @@ instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
|
||||
"vpaddq $tmp2,$tmp2,$tmp\n\t"
|
||||
"movdq $dst,$tmp2\t! add reduction8L" %}
|
||||
ins_encode %{
|
||||
__ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
|
||||
__ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
|
||||
__ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||
@ -5026,7 +5026,7 @@ instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
"pshufd $tmp,$src2,0x03\n\t"
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
"vextractf128 $tmp2,$src2\n\t"
|
||||
"vextractf128_high $tmp2,$src2\n\t"
|
||||
"vaddss $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
@ -5042,7 +5042,7 @@ instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5065,7 +5065,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
"pshufd $tmp,$src2,0x03\n\t"
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x1\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x1\n\t"
|
||||
"vaddss $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
@ -5073,7 +5073,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
"pshufd $tmp,$tmp2,0x03\n\t"
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x2\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x2\n\t"
|
||||
"vaddss $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
@ -5081,7 +5081,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
"pshufd $tmp,$tmp2,0x03\n\t"
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x3\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x3\n\t"
|
||||
"vaddss $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||
"vaddss $dst,$dst,$tmp\n\t"
|
||||
@ -5097,7 +5097,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5105,7 +5105,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5113,7 +5113,7 @@ instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5162,7 +5162,7 @@ instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
|
||||
format %{ "vaddsd $dst,$dst,$src2\n\t"
|
||||
"pshufd $tmp,$src2,0xE\n\t"
|
||||
"vaddsd $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4h $tmp2,$src2, 0x1\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x1\n\t"
|
||||
"vaddsd $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
|
||||
@ -5170,7 +5170,7 @@ instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5185,15 +5185,15 @@ instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
|
||||
format %{ "vaddsd $dst,$dst,$src2\n\t"
|
||||
"pshufd $tmp,$src2,0xE\n\t"
|
||||
"vaddsd $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x1\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x1\n\t"
|
||||
"vaddsd $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vaddsd $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x2\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x2\n\t"
|
||||
"vaddsd $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vaddsd $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x3\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x3\n\t"
|
||||
"vaddsd $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vaddsd $dst,$dst,$tmp\t! add reduction8D" %}
|
||||
@ -5201,15 +5201,15 @@ instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5307,7 +5307,7 @@ instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
|
||||
predicate(UseAVX > 0);
|
||||
match(Set dst (MulReductionVI src1 src2));
|
||||
effect(TEMP tmp, TEMP tmp2);
|
||||
format %{ "vextracti128 $tmp,$src2\n\t"
|
||||
format %{ "vextracti128_high $tmp,$src2\n\t"
|
||||
"vpmulld $tmp,$tmp,$src2\n\t"
|
||||
"pshufd $tmp2,$tmp,0xE\n\t"
|
||||
"vpmulld $tmp,$tmp,$tmp2\n\t"
|
||||
@ -5318,7 +5318,7 @@ instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF
|
||||
"movd $dst,$tmp2\t! mul reduction8I" %}
|
||||
ins_encode %{
|
||||
int vector_len = 0;
|
||||
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
|
||||
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
||||
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
||||
@ -5335,9 +5335,9 @@ instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF
|
||||
predicate(UseAVX > 2);
|
||||
match(Set dst (MulReductionVI src1 src2));
|
||||
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
|
||||
format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
|
||||
format %{ "vextracti64x4_high $tmp3,$src2\n\t"
|
||||
"vpmulld $tmp3,$tmp3,$src2\n\t"
|
||||
"vextracti128 $tmp,$tmp3\n\t"
|
||||
"vextracti128_high $tmp,$tmp3\n\t"
|
||||
"vpmulld $tmp,$tmp,$src2\n\t"
|
||||
"pshufd $tmp2,$tmp,0xE\n\t"
|
||||
"vpmulld $tmp,$tmp,$tmp2\n\t"
|
||||
@ -5347,9 +5347,9 @@ instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF
|
||||
"vpmulld $tmp2,$tmp,$tmp2\n\t"
|
||||
"movd $dst,$tmp2\t! mul reduction16I" %}
|
||||
ins_encode %{
|
||||
__ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
|
||||
__ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
|
||||
__ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
|
||||
__ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
|
||||
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
|
||||
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
|
||||
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
|
||||
@ -5386,7 +5386,7 @@ instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
|
||||
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
|
||||
match(Set dst (MulReductionVL src1 src2));
|
||||
effect(TEMP tmp, TEMP tmp2);
|
||||
format %{ "vextracti128 $tmp,$src2\n\t"
|
||||
format %{ "vextracti128_high $tmp,$src2\n\t"
|
||||
"vpmullq $tmp2,$tmp,$src2\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
||||
@ -5394,7 +5394,7 @@ instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF
|
||||
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
||||
"movdq $dst,$tmp2\t! mul reduction4L" %}
|
||||
ins_encode %{
|
||||
__ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||
@ -5409,9 +5409,9 @@ instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
|
||||
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
|
||||
match(Set dst (MulReductionVL src1 src2));
|
||||
effect(TEMP tmp, TEMP tmp2);
|
||||
format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
|
||||
format %{ "vextracti64x4_high $tmp2,$src2\n\t"
|
||||
"vpmullq $tmp2,$tmp2,$src2\n\t"
|
||||
"vextracti128 $tmp,$tmp2\n\t"
|
||||
"vextracti128_high $tmp,$tmp2\n\t"
|
||||
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
||||
@ -5419,9 +5419,9 @@ instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF
|
||||
"vpmullq $tmp2,$tmp2,$tmp\n\t"
|
||||
"movdq $dst,$tmp2\t! mul reduction8L" %}
|
||||
ins_encode %{
|
||||
__ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
|
||||
__ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
|
||||
__ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
|
||||
@ -5520,7 +5520,7 @@ instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
"pshufd $tmp,$src2,0x03\n\t"
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
"vextractf128 $tmp2,$src2\n\t"
|
||||
"vextractf128_high $tmp2,$src2\n\t"
|
||||
"vmulss $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
@ -5536,7 +5536,7 @@ instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5559,7 +5559,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
"pshufd $tmp,$src2,0x03\n\t"
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x1\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x1\n\t"
|
||||
"vmulss $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
@ -5567,7 +5567,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
"pshufd $tmp,$tmp2,0x03\n\t"
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x2\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x2\n\t"
|
||||
"vmulss $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
@ -5575,7 +5575,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
"pshufd $tmp,$tmp2,0x03\n\t"
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x3\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x3\n\t"
|
||||
"vmulss $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0x01\n\t"
|
||||
"vmulss $dst,$dst,$tmp\n\t"
|
||||
@ -5591,7 +5591,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5599,7 +5599,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5607,7 +5607,7 @@ instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
|
||||
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5656,7 +5656,7 @@ instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
|
||||
format %{ "vmulsd $dst,$dst,$src2\n\t"
|
||||
"pshufd $tmp,$src2,0xE\n\t"
|
||||
"vmulsd $dst,$dst,$tmp\n\t"
|
||||
"vextractf128 $tmp2,$src2\n\t"
|
||||
"vextractf128_high $tmp2,$src2\n\t"
|
||||
"vmulsd $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vmulsd $dst,$dst,$tmp\t! mul reduction4D" %}
|
||||
@ -5664,7 +5664,7 @@ instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
@ -5679,15 +5679,15 @@ instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
|
||||
format %{ "vmulsd $dst,$dst,$src2\n\t"
|
||||
"pshufd $tmp,$src2,0xE\n\t"
|
||||
"vmulsd $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x1\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x1\n\t"
|
||||
"vmulsd $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$src2,0xE\n\t"
|
||||
"vmulsd $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x2\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x2\n\t"
|
||||
"vmulsd $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vmulsd $dst,$dst,$tmp\n\t"
|
||||
"vextractf32x4 $tmp2,$src2, 0x3\n\t"
|
||||
"vextractf32x4 $tmp2,$src2,0x3\n\t"
|
||||
"vmulsd $dst,$dst,$tmp2\n\t"
|
||||
"pshufd $tmp,$tmp2,0xE\n\t"
|
||||
"vmulsd $dst,$dst,$tmp\t! mul reduction8D" %}
|
||||
@ -5695,15 +5695,15 @@ instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
__ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||
__ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
|
||||
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
|
||||
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
|
||||
|
||||
@ -1420,9 +1420,6 @@ const bool Matcher::isSimpleConstant64(jlong value) {
|
||||
// The ecx parameter to rep stos for the ClearArray node is in dwords.
|
||||
const bool Matcher::init_array_count_is_in_bytes = false;
|
||||
|
||||
// Threshold size for cleararray.
|
||||
const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
||||
|
||||
// Needs 2 CMOV's for longs.
|
||||
const int Matcher::long_cmove_cost() { return 1; }
|
||||
|
||||
@ -11369,27 +11366,54 @@ instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
|
||||
// =======================================================================
|
||||
// fast clearing of an array
|
||||
instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
||||
predicate(!UseFastStosb);
|
||||
predicate(!((ClearArrayNode*)n)->is_large());
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
||||
format %{ "XOR EAX,EAX\t# ClearArray:\n\t"
|
||||
"SHL ECX,1\t# Convert doublewords to words\n\t"
|
||||
"REP STOS\t# store EAX into [EDI++] while ECX--" %}
|
||||
|
||||
format %{ $$template
|
||||
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
|
||||
$$emit$$"CMP InitArrayShortSize,rcx\n\t"
|
||||
$$emit$$"JG LARGE\n\t"
|
||||
$$emit$$"SHL ECX, 1\n\t"
|
||||
$$emit$$"DEC ECX\n\t"
|
||||
$$emit$$"JS DONE\t# Zero length\n\t"
|
||||
$$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
|
||||
$$emit$$"DEC ECX\n\t"
|
||||
$$emit$$"JGE LOOP\n\t"
|
||||
$$emit$$"JMP DONE\n\t"
|
||||
$$emit$$"# LARGE:\n\t"
|
||||
if (UseFastStosb) {
|
||||
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
|
||||
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
|
||||
} else {
|
||||
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
|
||||
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
|
||||
}
|
||||
$$emit$$"# DONE"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
||||
predicate(UseFastStosb);
|
||||
instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
||||
predicate(((ClearArrayNode*)n)->is_large());
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
||||
format %{ "XOR EAX,EAX\t# ClearArray:\n\t"
|
||||
"SHL ECX,3\t# Convert doublewords to bytes\n\t"
|
||||
"REP STOSB\t# store EAX into [EDI++] while ECX--" %}
|
||||
format %{ $$template
|
||||
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
|
||||
if (UseFastStosb) {
|
||||
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
|
||||
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
|
||||
} else {
|
||||
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
|
||||
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
|
||||
}
|
||||
$$emit$$"# DONE"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
@ -1637,9 +1637,6 @@ const bool Matcher::isSimpleConstant64(jlong value) {
|
||||
// The ecx parameter to rep stosq for the ClearArray node is in words.
|
||||
const bool Matcher::init_array_count_is_in_bytes = false;
|
||||
|
||||
// Threshold size for cleararray.
|
||||
const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
||||
|
||||
// No additional cost for CMOVL.
|
||||
const int Matcher::long_cmove_cost() { return 0; }
|
||||
|
||||
@ -10460,31 +10457,55 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
|
||||
instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
|
||||
rFlagsReg cr)
|
||||
%{
|
||||
predicate(!UseFastStosb);
|
||||
predicate(!((ClearArrayNode*)n)->is_large());
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
||||
|
||||
format %{ "xorq rax, rax\t# ClearArray:\n\t"
|
||||
"rep stosq\t# Store rax to *rdi++ while rcx--" %}
|
||||
format %{ $$template
|
||||
$$emit$$"xorq rax, rax\t# ClearArray:\n\t"
|
||||
$$emit$$"cmp InitArrayShortSize,rcx\n\t"
|
||||
$$emit$$"jg LARGE\n\t"
|
||||
$$emit$$"dec rcx\n\t"
|
||||
$$emit$$"js DONE\t# Zero length\n\t"
|
||||
$$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
|
||||
$$emit$$"dec rcx\n\t"
|
||||
$$emit$$"jge LOOP\n\t"
|
||||
$$emit$$"jmp DONE\n\t"
|
||||
$$emit$$"# LARGE:\n\t"
|
||||
if (UseFastStosb) {
|
||||
$$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
|
||||
$$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
|
||||
} else {
|
||||
$$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
|
||||
}
|
||||
$$emit$$"# DONE"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
|
||||
rFlagsReg cr)
|
||||
instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
|
||||
rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseFastStosb);
|
||||
predicate(((ClearArrayNode*)n)->is_large());
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
|
||||
format %{ "xorq rax, rax\t# ClearArray:\n\t"
|
||||
"shlq rcx,3\t# Convert doublewords to bytes\n\t"
|
||||
"rep stosb\t# Store rax to *rdi++ while rcx--" %}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
|
||||
|
||||
format %{ $$template
|
||||
$$emit$$"xorq rax, rax\t# ClearArray:\n\t"
|
||||
if (UseFastStosb) {
|
||||
$$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
|
||||
$$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
|
||||
} else {
|
||||
$$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
|
||||
|
||||
@ -773,7 +773,7 @@ InterpreterFrame *InterpreterFrame::build(int size, TRAPS) {
|
||||
}
|
||||
|
||||
BasicType CppInterpreter::result_type_of(Method* method) {
|
||||
BasicType t;
|
||||
BasicType t = T_ILLEGAL; // silence compiler warnings
|
||||
switch (method->result_index()) {
|
||||
case 0 : t = T_BOOLEAN; break;
|
||||
case 1 : t = T_CHAR; break;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2007, 2008, 2010 Red Hat, Inc.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -62,7 +62,7 @@ void InterpreterRuntime::SignatureHandlerGeneratorBase::pass_object() {
|
||||
}
|
||||
|
||||
void InterpreterRuntime::SignatureHandlerGeneratorBase::push(BasicType type) {
|
||||
ffi_type *ftype;
|
||||
ffi_type *ftype = NULL;
|
||||
switch (type) {
|
||||
case T_VOID:
|
||||
ftype = &ffi_type_void;
|
||||
|
||||
|
Before Width: | Height: | Size: 636 B After Width: | Height: | Size: 636 B |
|
Before Width: | Height: | Size: 775 B After Width: | Height: | Size: 775 B |
|
Before Width: | Height: | Size: 644 B After Width: | Height: | Size: 644 B |
|
Before Width: | Height: | Size: 797 B After Width: | Height: | Size: 797 B |
|
Before Width: | Height: | Size: 208 B After Width: | Height: | Size: 208 B |
|
Before Width: | Height: | Size: 249 B After Width: | Height: | Size: 249 B |
|
Before Width: | Height: | Size: 434 B After Width: | Height: | Size: 434 B |
|
Before Width: | Height: | Size: 661 B After Width: | Height: | Size: 661 B |
|
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.3 KiB |
|
Before Width: | Height: | Size: 677 B After Width: | Height: | Size: 677 B |
|
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.3 KiB |
|
Before Width: | Height: | Size: 661 B After Width: | Height: | Size: 661 B |
|
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.3 KiB |
|
Before Width: | Height: | Size: 426 B After Width: | Height: | Size: 426 B |
|
Before Width: | Height: | Size: 778 B After Width: | Height: | Size: 778 B |
|
Before Width: | Height: | Size: 228 B After Width: | Height: | Size: 228 B |
|
Before Width: | Height: | Size: 462 B After Width: | Height: | Size: 462 B |
|
Before Width: | Height: | Size: 266 B After Width: | Height: | Size: 266 B |
|
Before Width: | Height: | Size: 255 B After Width: | Height: | Size: 255 B |
|
Before Width: | Height: | Size: 348 B After Width: | Height: | Size: 348 B |
|
Before Width: | Height: | Size: 303 B After Width: | Height: | Size: 303 B |
|
Before Width: | Height: | Size: 304 B After Width: | Height: | Size: 304 B |
|
Before Width: | Height: | Size: 484 B After Width: | Height: | Size: 484 B |
|
Before Width: | Height: | Size: 185 B After Width: | Height: | Size: 185 B |
|
Before Width: | Height: | Size: 184 B After Width: | Height: | Size: 184 B |
|
Before Width: | Height: | Size: 163 B After Width: | Height: | Size: 163 B |
|
Before Width: | Height: | Size: 179 B After Width: | Height: | Size: 179 B |
|
Before Width: | Height: | Size: 165 B After Width: | Height: | Size: 165 B |
|
Before Width: | Height: | Size: 178 B After Width: | Height: | Size: 178 B |
|
Before Width: | Height: | Size: 165 B After Width: | Height: | Size: 165 B |
|
Before Width: | Height: | Size: 178 B After Width: | Height: | Size: 178 B |
@ -22,6 +22,7 @@
|
||||
*/
|
||||
package jdk.vm.ci.amd64;
|
||||
|
||||
import static jdk.vm.ci.code.MemoryBarriers.LOAD_LOAD;
|
||||
import static jdk.vm.ci.code.MemoryBarriers.LOAD_STORE;
|
||||
import static jdk.vm.ci.code.MemoryBarriers.STORE_STORE;
|
||||
import static jdk.vm.ci.code.Register.SPECIAL;
|
||||
@ -202,7 +203,8 @@ public class AMD64 extends Architecture {
|
||||
AVX512ER,
|
||||
AVX512CD,
|
||||
AVX512BW,
|
||||
AVX512VL
|
||||
AVX512VL,
|
||||
SHA
|
||||
}
|
||||
|
||||
private final EnumSet<CPUFeature> features;
|
||||
@ -220,7 +222,7 @@ public class AMD64 extends Architecture {
|
||||
private final AMD64Kind largestKind;
|
||||
|
||||
public AMD64(EnumSet<CPUFeature> features, EnumSet<Flag> flags) {
|
||||
super("AMD64", AMD64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, LOAD_STORE | STORE_STORE, 1, 8);
|
||||
super("AMD64", AMD64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, LOAD_LOAD | LOAD_STORE | STORE_STORE, 1, 8);
|
||||
this.features = features;
|
||||
this.flags = flags;
|
||||
assert features.contains(CPUFeature.SSE2) : "minimum config for x64";
|
||||
|
||||