mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-04 13:10:15 +00:00
8264543: Cross modify fence optimization for x86
Reviewed-by: dholmes, tschatzl, sviswanathan
This commit is contained in:
parent
9e769090a0
commit
04134fcdaa
@ -27,6 +27,7 @@
|
||||
|
||||
#include "runtime/vm_version.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/sizes.hpp"
|
||||
|
||||
class VM_Version_Ext : public VM_Version {
|
||||
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include "logging/log.hpp"
|
||||
#include "logging/logStream.hpp"
|
||||
#include "memory/resourceArea.hpp"
|
||||
#include "memory/universe.hpp"
|
||||
#include "runtime/globals_extension.hpp"
|
||||
#include "runtime/java.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
@ -65,6 +66,22 @@ extern "C" {
|
||||
static get_cpu_info_stub_t get_cpu_info_stub = NULL;
|
||||
static detect_virt_stub_t detect_virt_stub = NULL;
|
||||
|
||||
#ifdef _LP64
|
||||
|
||||
bool VM_Version::supports_clflush() {
|
||||
// clflush should always be available on x86_64
|
||||
// if not we are in real trouble because we rely on it
|
||||
// to flush the code cache.
|
||||
// Unfortunately, Assembler::clflush is currently called as part
|
||||
// of generation of the code cache flush routine. This happens
|
||||
// under Universe::init before the processor features are set
|
||||
// up. Assembler::flush calls this routine to check that clflush
|
||||
// is allowed. So, we give the caller a free pass if Universe init
|
||||
// is still in progress.
|
||||
assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
public:
|
||||
|
||||
@ -25,9 +25,9 @@
|
||||
#ifndef CPU_X86_VM_VERSION_X86_HPP
|
||||
#define CPU_X86_VM_VERSION_X86_HPP
|
||||
|
||||
#include "memory/universe.hpp"
|
||||
#include "runtime/abstract_vm_version.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/sizes.hpp"
|
||||
|
||||
class VM_Version : public Abstract_VM_Version {
|
||||
friend class VMStructs;
|
||||
@ -261,7 +261,9 @@ class VM_Version : public Abstract_VM_Version {
|
||||
uint32_t : 2,
|
||||
avx512_4vnniw : 1,
|
||||
avx512_4fmaps : 1,
|
||||
: 28;
|
||||
: 10,
|
||||
serialize : 1,
|
||||
: 17;
|
||||
} bits;
|
||||
};
|
||||
|
||||
@ -359,7 +361,8 @@ protected:
|
||||
\
|
||||
decl(AVX512_VBMI2, "avx512_vbmi2", 44) /* VBMI2 shift left double instructions */ \
|
||||
decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \
|
||||
decl(HV, "hv", 46) /* Hypervisor instructions */
|
||||
decl(HV, "hv", 46) /* Hypervisor instructions */ \
|
||||
decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */
|
||||
|
||||
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
|
||||
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
|
||||
@ -646,6 +649,8 @@ enum Extended_Family {
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
|
||||
result |= CPU_CLWB;
|
||||
}
|
||||
if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
|
||||
result |= CPU_SERIALIZE;
|
||||
}
|
||||
|
||||
// ZX features.
|
||||
@ -896,6 +901,7 @@ public:
|
||||
static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; }
|
||||
static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
|
||||
static bool supports_hv() { return (_features & CPU_HV) != 0; }
|
||||
static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; }
|
||||
|
||||
// Intel features
|
||||
static bool is_intel_family_core() { return is_intel() &&
|
||||
@ -1027,19 +1033,8 @@ public:
|
||||
// and trailing StoreStore fences.
|
||||
|
||||
#ifdef _LP64
|
||||
static bool supports_clflush() {
|
||||
// clflush should always be available on x86_64
|
||||
// if not we are in real trouble because we rely on it
|
||||
// to flush the code cache.
|
||||
// Unfortunately, Assembler::clflush is currently called as part
|
||||
// of generation of the code cache flush routine. This happens
|
||||
// under Universe::init before the processor features are set
|
||||
// up. Assembler::flush calls this routine to check that clflush
|
||||
// is allowed. So, we give the caller a free pass if Universe init
|
||||
// is still in progress.
|
||||
assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool supports_clflush(); // Can't inline due to header file conflict
|
||||
#else
|
||||
static bool supports_clflush() { return ((_features & CPU_FLUSH) != 0); }
|
||||
#endif // _LP64
|
||||
|
||||
@ -60,8 +60,12 @@ inline void OrderAccess::fence() {
|
||||
}
|
||||
|
||||
inline void OrderAccess::cross_modify_fence_impl() {
|
||||
int idx = 0;
|
||||
__asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory");
|
||||
if (VM_Version::supports_serialize()) {
|
||||
__asm__ volatile (".byte 0x0f, 0x01, 0xe8\n\t" : : :); //serialize
|
||||
} else {
|
||||
int idx = 0;
|
||||
__asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory");
|
||||
}
|
||||
}
|
||||
|
||||
#endif // OS_CPU_BSD_X86_ORDERACCESS_BSD_X86_HPP
|
||||
|
||||
@ -56,14 +56,18 @@ inline void OrderAccess::fence() {
|
||||
}
|
||||
|
||||
inline void OrderAccess::cross_modify_fence_impl() {
|
||||
int idx = 0;
|
||||
if (VM_Version::supports_serialize()) {
|
||||
__asm__ volatile (".byte 0x0f, 0x01, 0xe8\n\t" : : :); //serialize
|
||||
} else {
|
||||
int idx = 0;
|
||||
#ifdef AMD64
|
||||
__asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory");
|
||||
__asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory");
|
||||
#else
|
||||
// On some x86 systems EBX is a reserved register that cannot be
|
||||
// clobbered, so we must protect it around the CPUID.
|
||||
__asm__ volatile ("xchg %%esi, %%ebx; cpuid; xchg %%esi, %%ebx " : "+a" (idx) : : "esi", "ecx", "edx", "memory");
|
||||
// On some x86 systems EBX is a reserved register that cannot be
|
||||
// clobbered, so we must protect it around the CPUID.
|
||||
__asm__ volatile ("xchg %%esi, %%ebx; cpuid; xchg %%esi, %%ebx " : "+a" (idx) : : "esi", "ecx", "edx", "memory");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#endif // OS_CPU_LINUX_X86_ORDERACCESS_LINUX_X86_HPP
|
||||
|
||||
@ -58,9 +58,22 @@ inline void OrderAccess::fence() {
|
||||
compiler_barrier();
|
||||
}
|
||||
|
||||
inline void OrderAccess::cross_modify_fence_impl() {
|
||||
inline void OrderAccess::cross_modify_fence_impl()
|
||||
#if _MSC_VER >= 1928
|
||||
{
|
||||
//_serialize() intrinsic is supported starting from VS2019-16.7.2
|
||||
if (VM_Version::supports_serialize()) {
|
||||
_serialize();
|
||||
} else {
|
||||
int regs[4];
|
||||
__cpuid(regs, 0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
{
|
||||
int regs[4];
|
||||
__cpuid(regs, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // OS_CPU_WINDOWS_X86_ORDERACCESS_WINDOWS_X86_HPP
|
||||
|
||||
@ -26,6 +26,7 @@
|
||||
#define SHARE_RUNTIME_ORDERACCESS_HPP
|
||||
|
||||
#include "memory/allocation.hpp"
|
||||
#include "runtime/vm_version.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
// Memory Access Ordering Model
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#ifndef SHARE_RUNTIME_VM_VERSION_HPP
|
||||
#define SHARE_RUNTIME_VM_VERSION_HPP
|
||||
|
||||
#include "runtime/globals.hpp"
|
||||
#include "utilities/macros.hpp" // for CPU_HEADER() macro.
|
||||
#include CPU_HEADER(vm_version)
|
||||
|
||||
|
||||
@ -220,6 +220,7 @@ public class AMD64 extends Architecture {
|
||||
AVX512_VBMI2,
|
||||
AVX512_VBMI,
|
||||
HV,
|
||||
SERIALIZE,
|
||||
}
|
||||
|
||||
private final EnumSet<CPUFeature> features;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user