8306706: Support out-of-line code generation for MachNodes

Reviewed-by: thartmann, kvn
This commit is contained in:
Quan Anh Mai 2023-05-23 17:06:25 +00:00
parent 710453c676
commit ab241b3428
9 changed files with 278 additions and 13 deletions

View File

@ -213,10 +213,7 @@ class Address {
_isxmmindex(false){
}
// No default displacement otherwise Register can be implicitly
// converted to 0(Register) which is quite a different animal.
Address(Register base, int disp)
explicit Address(Register base, int disp = 0)
: _base(base),
_index(noreg),
_xmmindex(xnoreg),

View File

@ -4293,6 +4293,56 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
}
}
#ifdef _LP64
static void convertF2I_slowpath(C2_MacroAssembler& masm, C2GeneralStub<Register, XMMRegister, address>& stub) {
#define __ masm.
Register dst = stub.data<0>();
XMMRegister src = stub.data<1>();
address target = stub.data<2>();
__ bind(stub.entry());
__ subptr(rsp, 8);
__ movdbl(Address(rsp), src);
__ call(RuntimeAddress(target));
__ pop(dst);
__ jmp(stub.continuation());
#undef __
}
void C2_MacroAssembler::convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src) {
assert(dst_bt == T_INT || dst_bt == T_LONG, "");
assert(src_bt == T_FLOAT || src_bt == T_DOUBLE, "");
address slowpath_target;
if (dst_bt == T_INT) {
if (src_bt == T_FLOAT) {
cvttss2sil(dst, src);
cmpl(dst, 0x80000000);
slowpath_target = StubRoutines::x86::f2i_fixup();
} else {
cvttsd2sil(dst, src);
cmpl(dst, 0x80000000);
slowpath_target = StubRoutines::x86::d2i_fixup();
}
} else {
if (src_bt == T_FLOAT) {
cvttss2siq(dst, src);
cmp64(dst, ExternalAddress(StubRoutines::x86::double_sign_flip()));
slowpath_target = StubRoutines::x86::f2l_fixup();
} else {
cvttsd2siq(dst, src);
cmp64(dst, ExternalAddress(StubRoutines::x86::double_sign_flip()));
slowpath_target = StubRoutines::x86::d2l_fixup();
}
}
auto stub = C2CodeStub::make<Register, XMMRegister, address>(dst, src, slowpath_target, 23, convertF2I_slowpath);
jcc(Assembler::equal, stub->entry());
bind(stub->continuation());
}
#endif // _LP64
void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
XMMRegister src1, int imm8, bool merge, int vlen_enc) {
switch(ideal_opc) {

View File

@ -304,6 +304,10 @@ public:
void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype);
void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype);
#ifdef _LP64
void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src);
#endif
void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
XMMRegister dst, XMMRegister src1, XMMRegister src2,
bool merge, int vlen_enc, bool is_varshift = false);

View File

@ -11020,9 +11020,9 @@ instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
%{
match(Set dst (ConvF2I src));
effect(KILL cr);
format %{ "convert_f2i $dst,$src" %}
format %{ "convert_f2i $dst, $src" %}
ins_encode %{
__ convert_f2i($dst$$Register, $src$$XMMRegister);
__ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}
@ -11031,9 +11031,9 @@ instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
%{
match(Set dst (ConvF2L src));
effect(KILL cr);
format %{ "convert_f2l $dst,$src"%}
format %{ "convert_f2l $dst, $src"%}
ins_encode %{
__ convert_f2l($dst$$Register, $src$$XMMRegister);
__ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}
@ -11042,9 +11042,9 @@ instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
%{
match(Set dst (ConvD2I src));
effect(KILL cr);
format %{ "convert_d2i $dst,$src"%}
format %{ "convert_d2i $dst, $src"%}
ins_encode %{
__ convert_d2i($dst$$Register, $src$$XMMRegister);
__ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}
@ -11053,9 +11053,9 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
%{
match(Set dst (ConvD2L src));
effect(KILL cr);
format %{ "convert_d2l $dst,$src"%}
format %{ "convert_d2l $dst, $src"%}
ins_encode %{
__ convert_d2l($dst$$Register, $src$$XMMRegister);
__ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}

View File

@ -25,7 +25,6 @@
#ifndef SHARE_METAPROGRAMMING_ENABLEIF_HPP
#define SHARE_METAPROGRAMMING_ENABLEIF_HPP
#include "metaprogramming/logical.hpp"
#include <type_traits>
// Retained temporarily for backward compatibility.

View File

@ -52,3 +52,10 @@ void C2CodeStubList::emit(CodeBuffer& cb) {
assert(max_size >= actual_size, "Expected stub size (%d) must be larger than or equal to actual stub size (%d)", max_size, actual_size);
}
}
// move here to avoid circular dependency between c2_CodeStubs.hpp and output.hpp
void C2CodeStub::add_to_stub_list() {
if (!Compile::current()->output()->in_scratch_emit_size()) {
Compile::current()->output()->add_stub(this);
}
}

View File

@ -26,16 +26,23 @@
#include "asm/codeBuffer.hpp"
#include "memory/allocation.hpp"
#include "opto/c2_MacroAssembler.hpp"
#include "opto/compile.hpp"
#include "opto/output.hpp"
#include "utilities/growableArray.hpp"
#include "utilities/tuple.hpp"
#ifndef SHARE_OPTO_C2_CODESTUBS_HPP
#define SHARE_OPTO_C2_CODESTUBS_HPP
template <class... Ts>
class C2GeneralStub;
class C2CodeStub : public ArenaObj {
private:
Label _entry;
Label _continuation;
void add_to_stub_list();
protected:
C2CodeStub() :
_entry(),
@ -47,6 +54,10 @@ public:
virtual void emit(C2_MacroAssembler& masm) = 0;
virtual int max_size() const = 0;
template <class... Ts>
static C2GeneralStub<Ts...>* make(const Ts&... data, int max_size,
void (*emit)(C2_MacroAssembler&, C2GeneralStub<Ts...>&));
};
class C2CodeStubList {
@ -101,4 +112,53 @@ public:
};
#endif
//-----------------------------C2GeneralStub-----------------------------------
// A generalized stub that can be used to implement an arbitrary stub in a
// type-safe manner. An example:
//
// Register dst; XMMRegister src;
// // The lambda defining how the code is emitted in the stub
// auto slowpath = [](C2_MacroAssembler& masm, C2GeneralStub<Register, XMMRegister>& stub) {
// // Access the saved data in a type safe manner
// Register dst = stub.get<0>();
// XMMRegister src = stub.get<1>();
// masm.bind(stub.entry());
// ...
// masm.jump(stub.continuation());
// }
// // Create a stub with 2 data fields being dst and src, a max size of 4 bytes
// // and predefined emission function
// auto stub = C2CodeStub::make<Register, XMMRegister>(dst, src, 4, slowpath);
// __ jump_conditional(stub->entry());
// ...
// __ bind(stub->continuation());
//
template <class... Ts>
class C2GeneralStub : public C2CodeStub {
private:
Tuple<Ts...> _data;
int _max_size;
void (*_emit)(C2_MacroAssembler&, C2GeneralStub&);
constexpr C2GeneralStub(const Ts&... data, int max_size,
void (*emit)(C2_MacroAssembler&, C2GeneralStub<Ts...>&))
: _data(data...), _max_size(max_size), _emit(emit) {}
friend C2CodeStub;
public:
template <std::size_t I>
constexpr const auto& data() const { return _data.template get<I>(); }
int max_size() const { return _max_size; }
void emit(C2_MacroAssembler& masm) { _emit(masm, *this); }
};
template <class... Ts>
C2GeneralStub<Ts...>* C2CodeStub::make(const Ts&... data, int max_size,
void (*emit)(C2_MacroAssembler&, C2GeneralStub<Ts...>&)) {
auto stub = new (Compile::current()->comp_arena()) C2GeneralStub<Ts...>(data..., max_size, emit);
stub->add_to_stub_list();
return stub;
}
#endif // SHARE_OPTO_C2_CODESTUBS_HPP

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_UTILITIES_TUPLE_HPP
#define SHARE_UTILITIES_TUPLE_HPP
#include <type_traits>
template <class... Ts>
class Tuple;
template <>
class Tuple<> {};
template <class T, class... Ts>
class Tuple<T, Ts...> {
private:
T _first;
Tuple<Ts...> _remaining;
public:
constexpr Tuple(const T& first, const Ts&... remaining) noexcept
: _first(first), _remaining(remaining...) {}
template <std::size_t I, std::enable_if_t<(I > 0), int> = 0>
constexpr const auto& get() const noexcept {
return _remaining.template get<I - 1>();
};
template <std::size_t I, std::enable_if_t<I == 0, int> = 0>
constexpr const T& get() const noexcept {
return _first;
}
};
#endif // SHARE_UTILITIES_TUPLE_HPP

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler.x86;
import org.openjdk.jmh.annotations.*;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
@Warmup(iterations = 5, time = 1)
@Measurement(iterations = 5, time = 1)
@Fork(value = 1, jvmArgsAppend = {"-XX:-UseSuperWord"})
public class ConvertF2I {
static final int LENGTH = 1000;
static final int[] INT_ARRAY = new int[LENGTH];
static final long[] LONG_ARRAY = new long[LENGTH];
static final float[] FLOAT_ARRAY = new float[LENGTH];
static final double[] DOUBLE_ARRAY = new double[LENGTH];
float f;
double d;
@Benchmark
public int f2iSingle() {
return (int)f;
}
@Benchmark
public long f2lSingle() {
return (long)f;
}
@Benchmark
public int d2iSingle() {
return (int)d;
}
@Benchmark
public long d2lSingle() {
return (long)d;
}
@Benchmark
public void f2iArray() {
for (int i = 0; i < LENGTH; i++) {
INT_ARRAY[i] = (int)FLOAT_ARRAY[i];
}
}
@Benchmark
public void f2lArray() {
for (int i = 0; i < LENGTH; i++) {
LONG_ARRAY[i] = (long)FLOAT_ARRAY[i];
}
}
@Benchmark
public void d2iArray() {
for (int i = 0; i < LENGTH; i++) {
INT_ARRAY[i] = (int)DOUBLE_ARRAY[i];
}
}
@Benchmark
public void d2lArray() {
for (int i = 0; i < LENGTH; i++) {
LONG_ARRAY[i] = (long)DOUBLE_ARRAY[i];
}
}
}