From 8d899925dc281c5dabbef14d85a6df807f8d300e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roberto=20Casta=C3=B1eda=20Lozano?= Date: Wed, 26 Apr 2023 08:25:11 +0000 Subject: [PATCH] 8298189: Regression in SPECjvm2008-MonteCarlo for pre-Cascade Lake Intel processors Co-authored-by: Quan Anh Mai Reviewed-by: shade, thartmann, kvn --- src/hotspot/cpu/x86/vm_version_x86.cpp | 7 +++++-- src/hotspot/cpu/x86/vm_version_x86.hpp | 2 ++ src/hotspot/cpu/x86/x86_64.ad | 19 +++++++++++++++---- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index b194f42e195..b4e9e721b5a 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -2079,11 +2079,14 @@ bool VM_Version::is_default_intel_cascade_lake() { return FLAG_IS_DEFAULT(UseAVX) && FLAG_IS_DEFAULT(MaxVectorSize) && UseAVX > 2 && - is_intel_skylake() && - _stepping >= 5; + is_intel_cascade_lake(); } #endif +bool VM_Version::is_intel_cascade_lake() { + return is_intel_skylake() && _stepping >= 5; +} + // avx3_threshold() sets the threshold at which 64-byte instructions are used // for implementing the array copy and clear operations. // The Intel platforms that supports the serialize instruction diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp index 65f8c5b3cba..3074621229a 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp @@ -716,6 +716,8 @@ public: static bool is_default_intel_cascade_lake(); #endif + static bool is_intel_cascade_lake(); + static int avx3_threshold(); static bool is_intel_tsc_synched_at_init(); diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 7c1eb99bf58..fd64a684674 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -13732,6 +13732,13 @@ instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift) ins_pipe(ialu_reg_reg); %} +// These peephole rules replace mov + I pairs (where I is one of {add, inc, dec, +// sal}) with lea instructions. The {add, sal} rules are beneficial in +// processors with at least partial ALU support for lea +// (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally +// beneficial for processors with full ALU support +// (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake. + peephole %{ peeppredicate(VM_Version::supports_fast_2op_lea()); @@ -13750,7 +13757,8 @@ peephole peephole %{ - peeppredicate(VM_Version::supports_fast_2op_lea()); + peeppredicate(VM_Version::supports_fast_3op_lea() || + VM_Version::is_intel_cascade_lake()); peepmatch (incI_rReg); peepprocedure (lea_coalesce_imm); peepreplace (leaI_rReg_immI_peep()); @@ -13758,7 +13766,8 @@ peephole peephole %{ - peeppredicate(VM_Version::supports_fast_2op_lea()); + peeppredicate(VM_Version::supports_fast_3op_lea() || + VM_Version::is_intel_cascade_lake()); peepmatch (decI_rReg); peepprocedure (lea_coalesce_imm); peepreplace (leaI_rReg_immI_peep()); @@ -13790,7 +13799,8 @@ peephole peephole %{ - peeppredicate(VM_Version::supports_fast_2op_lea()); + peeppredicate(VM_Version::supports_fast_3op_lea() || + VM_Version::is_intel_cascade_lake()); peepmatch (incL_rReg); peepprocedure (lea_coalesce_imm); peepreplace (leaL_rReg_immL32_peep()); @@ -13798,7 +13808,8 @@ peephole peephole %{ - peeppredicate(VM_Version::supports_fast_2op_lea()); + peeppredicate(VM_Version::supports_fast_3op_lea() || + VM_Version::is_intel_cascade_lake()); peepmatch (decL_rReg); peepprocedure (lea_coalesce_imm); peepreplace (leaL_rReg_immL32_peep());