From ab241b3428839fd121ee4ce5fdafeb649f453550 Mon Sep 17 00:00:00 2001 From: Quan Anh Mai Date: Tue, 23 May 2023 17:06:25 +0000 Subject: [PATCH] 8306706: Support out-of-line code generation for MachNodes Reviewed-by: thartmann, kvn --- src/hotspot/cpu/x86/assembler_x86.hpp | 5 +- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 50 ++++++++++ src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp | 4 + src/hotspot/cpu/x86/x86_64.ad | 16 ++-- .../share/metaprogramming/enableIf.hpp | 1 - src/hotspot/share/opto/c2_CodeStubs.cpp | 7 ++ src/hotspot/share/opto/c2_CodeStubs.hpp | 60 ++++++++++++ src/hotspot/share/utilities/tuple.hpp | 57 ++++++++++++ .../bench/vm/compiler/x86/ConvertF2I.java | 91 +++++++++++++++++++ 9 files changed, 278 insertions(+), 13 deletions(-) create mode 100644 src/hotspot/share/utilities/tuple.hpp create mode 100644 test/micro/org/openjdk/bench/vm/compiler/x86/ConvertF2I.java diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index d551442ecf2..1c5f843f3b7 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -213,10 +213,7 @@ class Address { _isxmmindex(false){ } - // No default displacement otherwise Register can be implicitly - // converted to 0(Register) which is quite a different animal. - - Address(Register base, int disp) + explicit Address(Register base, int disp = 0) : _base(base), _index(noreg), _xmmindex(xnoreg), diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index f77165bf5a7..0dc02d9f07d 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -4293,6 +4293,56 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register } } +#ifdef _LP64 + +static void convertF2I_slowpath(C2_MacroAssembler& masm, C2GeneralStub& stub) { +#define __ masm. + Register dst = stub.data<0>(); + XMMRegister src = stub.data<1>(); + address target = stub.data<2>(); + __ bind(stub.entry()); + __ subptr(rsp, 8); + __ movdbl(Address(rsp), src); + __ call(RuntimeAddress(target)); + __ pop(dst); + __ jmp(stub.continuation()); +#undef __ +} + +void C2_MacroAssembler::convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src) { + assert(dst_bt == T_INT || dst_bt == T_LONG, ""); + assert(src_bt == T_FLOAT || src_bt == T_DOUBLE, ""); + + address slowpath_target; + if (dst_bt == T_INT) { + if (src_bt == T_FLOAT) { + cvttss2sil(dst, src); + cmpl(dst, 0x80000000); + slowpath_target = StubRoutines::x86::f2i_fixup(); + } else { + cvttsd2sil(dst, src); + cmpl(dst, 0x80000000); + slowpath_target = StubRoutines::x86::d2i_fixup(); + } + } else { + if (src_bt == T_FLOAT) { + cvttss2siq(dst, src); + cmp64(dst, ExternalAddress(StubRoutines::x86::double_sign_flip())); + slowpath_target = StubRoutines::x86::f2l_fixup(); + } else { + cvttsd2siq(dst, src); + cmp64(dst, ExternalAddress(StubRoutines::x86::double_sign_flip())); + slowpath_target = StubRoutines::x86::d2l_fixup(); + } + } + + auto stub = C2CodeStub::make(dst, src, slowpath_target, 23, convertF2I_slowpath); + jcc(Assembler::equal, stub->entry()); + bind(stub->continuation()); +} + +#endif // _LP64 + void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, XMMRegister src1, int imm8, bool merge, int vlen_enc) { switch(ideal_opc) { diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 952d99e4f77..e9e1412957b 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -304,6 +304,10 @@ public: void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype); void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype); +#ifdef _LP64 + void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src); +#endif + void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc, bool is_varshift = false); diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 218cd46a1c4..ef0e7a9bea4 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -11020,9 +11020,9 @@ instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr) %{ match(Set dst (ConvF2I src)); effect(KILL cr); - format %{ "convert_f2i $dst,$src" %} + format %{ "convert_f2i $dst, $src" %} ins_encode %{ - __ convert_f2i($dst$$Register, $src$$XMMRegister); + __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} @@ -11031,9 +11031,9 @@ instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr) %{ match(Set dst (ConvF2L src)); effect(KILL cr); - format %{ "convert_f2l $dst,$src"%} + format %{ "convert_f2l $dst, $src"%} ins_encode %{ - __ convert_f2l($dst$$Register, $src$$XMMRegister); + __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} @@ -11042,9 +11042,9 @@ instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr) %{ match(Set dst (ConvD2I src)); effect(KILL cr); - format %{ "convert_d2i $dst,$src"%} + format %{ "convert_d2i $dst, $src"%} ins_encode %{ - __ convert_d2i($dst$$Register, $src$$XMMRegister); + __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} @@ -11053,9 +11053,9 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr) %{ match(Set dst (ConvD2L src)); effect(KILL cr); - format %{ "convert_d2l $dst,$src"%} + format %{ "convert_d2l $dst, $src"%} ins_encode %{ - __ convert_d2l($dst$$Register, $src$$XMMRegister); + __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} diff --git a/src/hotspot/share/metaprogramming/enableIf.hpp b/src/hotspot/share/metaprogramming/enableIf.hpp index 66ab39944b7..73dce0dd120 100644 --- a/src/hotspot/share/metaprogramming/enableIf.hpp +++ b/src/hotspot/share/metaprogramming/enableIf.hpp @@ -25,7 +25,6 @@ #ifndef SHARE_METAPROGRAMMING_ENABLEIF_HPP #define SHARE_METAPROGRAMMING_ENABLEIF_HPP -#include "metaprogramming/logical.hpp" #include // Retained temporarily for backward compatibility. diff --git a/src/hotspot/share/opto/c2_CodeStubs.cpp b/src/hotspot/share/opto/c2_CodeStubs.cpp index 8f4ef1b282a..f96f086d7a1 100644 --- a/src/hotspot/share/opto/c2_CodeStubs.cpp +++ b/src/hotspot/share/opto/c2_CodeStubs.cpp @@ -52,3 +52,10 @@ void C2CodeStubList::emit(CodeBuffer& cb) { assert(max_size >= actual_size, "Expected stub size (%d) must be larger than or equal to actual stub size (%d)", max_size, actual_size); } } + +// move here to avoid circular dependency between c2_CodeStubs.hpp and output.hpp +void C2CodeStub::add_to_stub_list() { + if (!Compile::current()->output()->in_scratch_emit_size()) { + Compile::current()->output()->add_stub(this); + } +} diff --git a/src/hotspot/share/opto/c2_CodeStubs.hpp b/src/hotspot/share/opto/c2_CodeStubs.hpp index 8af906d0542..3df1e4b726d 100644 --- a/src/hotspot/share/opto/c2_CodeStubs.hpp +++ b/src/hotspot/share/opto/c2_CodeStubs.hpp @@ -26,16 +26,23 @@ #include "asm/codeBuffer.hpp" #include "memory/allocation.hpp" #include "opto/c2_MacroAssembler.hpp" +#include "opto/compile.hpp" +#include "opto/output.hpp" #include "utilities/growableArray.hpp" +#include "utilities/tuple.hpp" #ifndef SHARE_OPTO_C2_CODESTUBS_HPP #define SHARE_OPTO_C2_CODESTUBS_HPP +template +class C2GeneralStub; + class C2CodeStub : public ArenaObj { private: Label _entry; Label _continuation; + void add_to_stub_list(); protected: C2CodeStub() : _entry(), @@ -47,6 +54,10 @@ public: virtual void emit(C2_MacroAssembler& masm) = 0; virtual int max_size() const = 0; + + template + static C2GeneralStub* make(const Ts&... data, int max_size, + void (*emit)(C2_MacroAssembler&, C2GeneralStub&)); }; class C2CodeStubList { @@ -101,4 +112,53 @@ public: }; #endif +//-----------------------------C2GeneralStub----------------------------------- +// A generalized stub that can be used to implement an arbitrary stub in a +// type-safe manner. An example: +// +// Register dst; XMMRegister src; +// // The lambda defining how the code is emitted in the stub +// auto slowpath = [](C2_MacroAssembler& masm, C2GeneralStub& stub) { +// // Access the saved data in a type safe manner +// Register dst = stub.get<0>(); +// XMMRegister src = stub.get<1>(); +// masm.bind(stub.entry()); +// ... +// masm.jump(stub.continuation()); +// } +// // Create a stub with 2 data fields being dst and src, a max size of 4 bytes +// // and predefined emission function +// auto stub = C2CodeStub::make(dst, src, 4, slowpath); +// __ jump_conditional(stub->entry()); +// ... +// __ bind(stub->continuation()); +// +template +class C2GeneralStub : public C2CodeStub { +private: + Tuple _data; + int _max_size; + void (*_emit)(C2_MacroAssembler&, C2GeneralStub&); + + constexpr C2GeneralStub(const Ts&... data, int max_size, + void (*emit)(C2_MacroAssembler&, C2GeneralStub&)) + : _data(data...), _max_size(max_size), _emit(emit) {} + + friend C2CodeStub; +public: + template + constexpr const auto& data() const { return _data.template get(); } + + int max_size() const { return _max_size; } + void emit(C2_MacroAssembler& masm) { _emit(masm, *this); } +}; + +template +C2GeneralStub* C2CodeStub::make(const Ts&... data, int max_size, + void (*emit)(C2_MacroAssembler&, C2GeneralStub&)) { + auto stub = new (Compile::current()->comp_arena()) C2GeneralStub(data..., max_size, emit); + stub->add_to_stub_list(); + return stub; +} + #endif // SHARE_OPTO_C2_CODESTUBS_HPP diff --git a/src/hotspot/share/utilities/tuple.hpp b/src/hotspot/share/utilities/tuple.hpp new file mode 100644 index 00000000000..1b589a0c204 --- /dev/null +++ b/src/hotspot/share/utilities/tuple.hpp @@ -0,0 +1,57 @@ +/* +* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. +* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +* +* This code is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License version 2 only, as +* published by the Free Software Foundation. +* +* This code is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* version 2 for more details (a copy is included in the LICENSE file that +* accompanied this code). +* +* You should have received a copy of the GNU General Public License version +* 2 along with this work; if not, write to the Free Software Foundation, +* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +* +* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +* or visit www.oracle.com if you need additional information or have any +* questions. +* +*/ + +#ifndef SHARE_UTILITIES_TUPLE_HPP +#define SHARE_UTILITIES_TUPLE_HPP + +#include + +template +class Tuple; + +template <> +class Tuple<> {}; + +template +class Tuple { +private: + T _first; + Tuple _remaining; + +public: + constexpr Tuple(const T& first, const Ts&... remaining) noexcept + : _first(first), _remaining(remaining...) {} + + template 0), int> = 0> + constexpr const auto& get() const noexcept { + return _remaining.template get(); + }; + + template = 0> + constexpr const T& get() const noexcept { + return _first; + } +}; + +#endif // SHARE_UTILITIES_TUPLE_HPP diff --git a/test/micro/org/openjdk/bench/vm/compiler/x86/ConvertF2I.java b/test/micro/org/openjdk/bench/vm/compiler/x86/ConvertF2I.java new file mode 100644 index 00000000000..61d375f4d82 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/x86/ConvertF2I.java @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.compiler.x86; + +import org.openjdk.jmh.annotations.*; + +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 5, time = 1) +@Measurement(iterations = 5, time = 1) +@Fork(value = 1, jvmArgsAppend = {"-XX:-UseSuperWord"}) +public class ConvertF2I { + static final int LENGTH = 1000; + static final int[] INT_ARRAY = new int[LENGTH]; + static final long[] LONG_ARRAY = new long[LENGTH]; + static final float[] FLOAT_ARRAY = new float[LENGTH]; + static final double[] DOUBLE_ARRAY = new double[LENGTH]; + float f; + double d; + + @Benchmark + public int f2iSingle() { + return (int)f; + } + + @Benchmark + public long f2lSingle() { + return (long)f; + } + + @Benchmark + public int d2iSingle() { + return (int)d; + } + + @Benchmark + public long d2lSingle() { + return (long)d; + } + + @Benchmark + public void f2iArray() { + for (int i = 0; i < LENGTH; i++) { + INT_ARRAY[i] = (int)FLOAT_ARRAY[i]; + } + } + + @Benchmark + public void f2lArray() { + for (int i = 0; i < LENGTH; i++) { + LONG_ARRAY[i] = (long)FLOAT_ARRAY[i]; + } + } + + @Benchmark + public void d2iArray() { + for (int i = 0; i < LENGTH; i++) { + INT_ARRAY[i] = (int)DOUBLE_ARRAY[i]; + } + } + + @Benchmark + public void d2lArray() { + for (int i = 0; i < LENGTH; i++) { + LONG_ARRAY[i] = (long)DOUBLE_ARRAY[i]; + } + } +}