From ff12ff534abb2e08d1bb44a83ef4f84b8476f94c Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Fri, 15 Nov 2024 18:10:30 +0000 Subject: [PATCH] 8340453: C2: Improve encoding of LoadNKlass for compact headers Reviewed-by: rcastanedalo, mli --- src/hotspot/cpu/aarch64/aarch64.ad | 17 +++++++++-------- .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 9 --------- .../cpu/aarch64/c2_MacroAssembler_aarch64.hpp | 2 -- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 10 ---------- src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp | 4 ---- src/hotspot/cpu/x86/x86_64.ad | 12 ++++++++---- src/hotspot/share/oops/markWord.hpp | 2 ++ src/hotspot/share/oops/oop.hpp | 8 ++------ src/hotspot/share/opto/memnode.hpp | 6 ++++++ 9 files changed, 27 insertions(+), 43 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index a97d01b7683..1015b631643 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -5755,10 +5755,6 @@ opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indInde indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P); -opclass memory_noindex(indirect, - indOffI1, indOffL1,indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8, - indirectN, indOffIN, indOffLN, indirectX2P, indOffX2P); - // iRegIorL2I is used for src inputs in rules for 32 bit int (I) // operations. it allows the src to be either an iRegI or a (ConvL2I // iRegL). in the latter case the l2i normally planted for a ConvL2I @@ -6695,16 +6691,21 @@ instruct loadNKlass(iRegNNoSp dst, memory4 mem) ins_pipe(iload_reg_mem); %} -instruct loadNKlassCompactHeaders(iRegNNoSp dst, memory_noindex mem) +instruct loadNKlassCompactHeaders(iRegNNoSp dst, memory4 mem) %{ match(Set dst (LoadNKlass mem)); predicate(!needs_acquiring_load(n) && UseCompactObjectHeaders); ins_cost(4 * INSN_COST); - format %{ "load_narrow_klass_compact $dst, $mem\t# compressed class ptr" %} + format %{ + "ldrw $dst, $mem\t# compressed class ptr, shifted\n\t" + "lsrw $dst, $dst, markWord::klass_shift_at_offset" + %} ins_encode %{ - assert($mem$$index$$Register == noreg, "must not have indexed address"); - __ load_narrow_klass_compact_c2($dst$$Register, $mem$$base$$Register, $mem$$disp); + // inlined aarch64_enc_ldrw + loadStore(masm, &MacroAssembler::ldrw, $dst$$Register, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4); + __ lsrw($dst$$Register, $dst$$Register, markWord::klass_shift_at_offset); %} ins_pipe(iload_reg_mem); %} diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 4c22133c056..3b0c8ae432c 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -2690,12 +2690,3 @@ bool C2_MacroAssembler::in_scratch_emit_size() { } return MacroAssembler::in_scratch_emit_size(); } - -void C2_MacroAssembler::load_narrow_klass_compact_c2(Register dst, Register obj, int disp) { - // Note: Don't clobber obj anywhere in that method! - - // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract - // obj-start, so that we can load from the object's mark-word instead. - ldr(dst, Address(obj, disp - oopDesc::klass_offset_in_bytes())); - lsr(dst, dst, markWord::klass_shift); -} diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index c6ddcf46cba..d61b050407d 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -186,6 +186,4 @@ void vector_signum_sve(FloatRegister dst, FloatRegister src, FloatRegister zero, FloatRegister one, FloatRegister vtmp, PRegister pgtmp, SIMD_RegVariant T); - void load_narrow_klass_compact_c2(Register dst, Register obj, int disp); - #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 06d93ddea26..8d0af29e91d 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -7073,13 +7073,3 @@ void C2_MacroAssembler::vector_saturating_op(int ideal_opc, BasicType elem_bt, X vector_saturating_op(ideal_opc, elem_bt, dst, src1, src2, vlen_enc); } } - -#ifdef _LP64 -void C2_MacroAssembler::load_narrow_klass_compact_c2(Register dst, Address src) { - // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract - // obj-start, so that we can load from the object's mark-word instead. Usually the address - // comes as obj-start in obj and klass_offset_in_bytes in disp. - movq(dst, src.plus_disp(-oopDesc::klass_offset_in_bytes())); - shrq(dst, markWord::klass_shift); -} -#endif diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 523200486cc..3a36fd75e3f 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -583,8 +583,4 @@ public: void select_from_two_vectors_evex(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc); -#ifdef _LP64 - void load_narrow_klass_compact_c2(Register dst, Address src); -#endif - #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index fc083ecfa24..550c8047034 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -4368,11 +4368,15 @@ instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr) match(Set dst (LoadNKlass mem)); effect(KILL cr); ins_cost(125); // XXX - format %{ "load_narrow_klass_compact $dst, $mem\t# compressed klass ptr" %} - ins_encode %{ - __ load_narrow_klass_compact_c2($dst$$Register, $mem$$Address); + format %{ + "movl $dst, $mem\t# compressed klass ptr, shifted\n\t" + "shrl $dst, markWord::klass_shift_at_offset" %} - ins_pipe(pipe_slow); // XXX + ins_encode %{ + __ movl($dst$$Register, $mem$$Address); + __ shrl($dst$$Register, markWord::klass_shift_at_offset); + %} + ins_pipe(ialu_reg_mem); // XXX %} // Load Float diff --git a/src/hotspot/share/oops/markWord.hpp b/src/hotspot/share/oops/markWord.hpp index 7d2bff1efc0..1e1b8d77a90 100644 --- a/src/hotspot/share/oops/markWord.hpp +++ b/src/hotspot/share/oops/markWord.hpp @@ -133,7 +133,9 @@ class markWord { // We store the (narrow) Klass* in the bits 43 to 64. // These are for bit-precise extraction of the narrow Klass* from the 64-bit Markword + static constexpr int klass_offset_in_bytes = 4; static constexpr int klass_shift = hash_shift + hash_bits; + static constexpr int klass_shift_at_offset = klass_shift - klass_offset_in_bytes * BitsPerByte; static constexpr int klass_bits = 22; static constexpr uintptr_t klass_mask = right_n_bits(klass_bits); static constexpr uintptr_t klass_mask_in_place = klass_mask << klass_shift; diff --git a/src/hotspot/share/oops/oop.hpp b/src/hotspot/share/oops/oop.hpp index dcf42c7343b..18e421d620c 100644 --- a/src/hotspot/share/oops/oop.hpp +++ b/src/hotspot/share/oops/oop.hpp @@ -332,12 +332,8 @@ class oopDesc { #ifdef _LP64 if (UseCompactObjectHeaders) { // NOTE: The only places where this is used with compact headers are the C2 - // compiler and JVMCI, and even there we don't use it to access the (narrow)Klass* - // directly. It is used only as a placeholder to identify the special memory slice - // containing Klass* info. This value could be any value that is not a valid - // field offset. Use an offset halfway into the markWord, as the markWord is never - // partially loaded from C2 and JVMCI. - return mark_offset_in_bytes() + 4; + // compiler and JVMCI. + return mark_offset_in_bytes() + markWord::klass_offset_in_bytes; } else #endif { diff --git a/src/hotspot/share/opto/memnode.hpp b/src/hotspot/share/opto/memnode.hpp index 1ca3a4b16ce..83ac80c043f 100644 --- a/src/hotspot/share/opto/memnode.hpp +++ b/src/hotspot/share/opto/memnode.hpp @@ -541,6 +541,12 @@ public: //------------------------------LoadNKlassNode--------------------------------- // Load a narrow Klass from an object. +// With compact headers, the input address (adr) does not point at the exact +// header position where the (narrow) class pointer is located, but into the +// middle of the mark word (see oopDesc::klass_offset_in_bytes()). This node +// implicitly shifts the loaded value (markWord::klass_shift_at_offset bits) to +// extract the actual class pointer. C2's type system is agnostic on whether the +// input address directly points into the class pointer. class LoadNKlassNode : public LoadNNode { public: LoadNKlassNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeNarrowKlass *tk, MemOrd mo)