8264543: Cross modify fence optimization for x86

Reviewed-by: dholmes, tschatzl, sviswanathan
This commit is contained in:
Xubo Zhang 2021-08-04 05:43:58 +00:00 committed by David Holmes
parent 9e769090a0
commit 04134fcdaa
9 changed files with 61 additions and 24 deletions

View File

@ -27,6 +27,7 @@
#include "runtime/vm_version.hpp"
#include "utilities/macros.hpp"
#include "utilities/sizes.hpp"
class VM_Version_Ext : public VM_Version {

View File

@ -30,6 +30,7 @@
#include "logging/log.hpp"
#include "logging/logStream.hpp"
#include "memory/resourceArea.hpp"
#include "memory/universe.hpp"
#include "runtime/globals_extension.hpp"
#include "runtime/java.hpp"
#include "runtime/os.hpp"
@ -65,6 +66,22 @@ extern "C" {
static get_cpu_info_stub_t get_cpu_info_stub = NULL;
static detect_virt_stub_t detect_virt_stub = NULL;
#ifdef _LP64
bool VM_Version::supports_clflush() {
// clflush should always be available on x86_64
// if not we are in real trouble because we rely on it
// to flush the code cache.
// Unfortunately, Assembler::clflush is currently called as part
// of generation of the code cache flush routine. This happens
// under Universe::init before the processor features are set
// up. Assembler::flush calls this routine to check that clflush
// is allowed. So, we give the caller a free pass if Universe init
// is still in progress.
assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
return true;
}
#endif
class VM_Version_StubGenerator: public StubCodeGenerator {
public:

View File

@ -25,9 +25,9 @@
#ifndef CPU_X86_VM_VERSION_X86_HPP
#define CPU_X86_VM_VERSION_X86_HPP
#include "memory/universe.hpp"
#include "runtime/abstract_vm_version.hpp"
#include "utilities/macros.hpp"
#include "utilities/sizes.hpp"
class VM_Version : public Abstract_VM_Version {
friend class VMStructs;
@ -261,7 +261,9 @@ class VM_Version : public Abstract_VM_Version {
uint32_t : 2,
avx512_4vnniw : 1,
avx512_4fmaps : 1,
: 28;
: 10,
serialize : 1,
: 17;
} bits;
};
@ -359,7 +361,8 @@ protected:
\
decl(AVX512_VBMI2, "avx512_vbmi2", 44) /* VBMI2 shift left double instructions */ \
decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \
decl(HV, "hv", 46) /* Hypervisor instructions */
decl(HV, "hv", 46) /* Hypervisor instructions */ \
decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
@ -646,6 +649,8 @@ enum Extended_Family {
if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
result |= CPU_CLWB;
}
if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
result |= CPU_SERIALIZE;
}
// ZX features.
@ -896,6 +901,7 @@ public:
static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; }
static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
static bool supports_hv() { return (_features & CPU_HV) != 0; }
static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; }
// Intel features
static bool is_intel_family_core() { return is_intel() &&
@ -1027,19 +1033,8 @@ public:
// and trailing StoreStore fences.
#ifdef _LP64
static bool supports_clflush() {
// clflush should always be available on x86_64
// if not we are in real trouble because we rely on it
// to flush the code cache.
// Unfortunately, Assembler::clflush is currently called as part
// of generation of the code cache flush routine. This happens
// under Universe::init before the processor features are set
// up. Assembler::flush calls this routine to check that clflush
// is allowed. So, we give the caller a free pass if Universe init
// is still in progress.
assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
return true;
}
static bool supports_clflush(); // Can't inline due to header file conflict
#else
static bool supports_clflush() { return ((_features & CPU_FLUSH) != 0); }
#endif // _LP64

View File

@ -60,8 +60,12 @@ inline void OrderAccess::fence() {
}
inline void OrderAccess::cross_modify_fence_impl() {
int idx = 0;
__asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory");
if (VM_Version::supports_serialize()) {
__asm__ volatile (".byte 0x0f, 0x01, 0xe8\n\t" : : :); //serialize
} else {
int idx = 0;
__asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory");
}
}
#endif // OS_CPU_BSD_X86_ORDERACCESS_BSD_X86_HPP

View File

@ -56,14 +56,18 @@ inline void OrderAccess::fence() {
}
inline void OrderAccess::cross_modify_fence_impl() {
int idx = 0;
if (VM_Version::supports_serialize()) {
__asm__ volatile (".byte 0x0f, 0x01, 0xe8\n\t" : : :); //serialize
} else {
int idx = 0;
#ifdef AMD64
__asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory");
__asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory");
#else
// On some x86 systems EBX is a reserved register that cannot be
// clobbered, so we must protect it around the CPUID.
__asm__ volatile ("xchg %%esi, %%ebx; cpuid; xchg %%esi, %%ebx " : "+a" (idx) : : "esi", "ecx", "edx", "memory");
// On some x86 systems EBX is a reserved register that cannot be
// clobbered, so we must protect it around the CPUID.
__asm__ volatile ("xchg %%esi, %%ebx; cpuid; xchg %%esi, %%ebx " : "+a" (idx) : : "esi", "ecx", "edx", "memory");
#endif
}
}
#endif // OS_CPU_LINUX_X86_ORDERACCESS_LINUX_X86_HPP

View File

@ -58,9 +58,22 @@ inline void OrderAccess::fence() {
compiler_barrier();
}
inline void OrderAccess::cross_modify_fence_impl() {
inline void OrderAccess::cross_modify_fence_impl()
#if _MSC_VER >= 1928
{
//_serialize() intrinsic is supported starting from VS2019-16.7.2
if (VM_Version::supports_serialize()) {
_serialize();
} else {
int regs[4];
__cpuid(regs, 0);
}
}
#else
{
int regs[4];
__cpuid(regs, 0);
}
#endif
#endif // OS_CPU_WINDOWS_X86_ORDERACCESS_WINDOWS_X86_HPP

View File

@ -26,6 +26,7 @@
#define SHARE_RUNTIME_ORDERACCESS_HPP
#include "memory/allocation.hpp"
#include "runtime/vm_version.hpp"
#include "utilities/macros.hpp"
// Memory Access Ordering Model

View File

@ -25,6 +25,7 @@
#ifndef SHARE_RUNTIME_VM_VERSION_HPP
#define SHARE_RUNTIME_VM_VERSION_HPP
#include "runtime/globals.hpp"
#include "utilities/macros.hpp" // for CPU_HEADER() macro.
#include CPU_HEADER(vm_version)

View File

@ -220,6 +220,7 @@ public class AMD64 extends Architecture {
AVX512_VBMI2,
AVX512_VBMI,
HV,
SERIALIZE,
}
private final EnumSet<CPUFeature> features;