8306706: Support out-of-line code generation for MachNodes

Reviewed-by: thartmann, kvn
2026-06-16 15:33:15 +00:00 · 2023-05-23 17:06:25 +00:00 · 2023-05-23 17:06:25 +00:00 · ab241b3428
commit ab241b3428
parent 710453c676
9 changed files with 278 additions and 13 deletions
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@ -213,10 +213,7 @@ class Address {
      _isxmmindex(false){
  }

-  // No default displacement otherwise Register can be implicitly
-  // converted to 0(Register) which is quite a different animal.
-
-  Address(Register base, int disp)
+  explicit Address(Register base, int disp = 0)
    : _base(base),
      _index(noreg),
      _xmmindex(xnoreg),
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@ -4293,6 +4293,56 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
  }
 }

+#ifdef _LP64
+
+static void convertF2I_slowpath(C2_MacroAssembler& masm, C2GeneralStub<Register, XMMRegister, address>& stub) {
+#define __ masm.
+  Register dst = stub.data<0>();
+  XMMRegister src = stub.data<1>();
+  address target = stub.data<2>();
+  __ bind(stub.entry());
+  __ subptr(rsp, 8);
+  __ movdbl(Address(rsp), src);
+  __ call(RuntimeAddress(target));
+  __ pop(dst);
+  __ jmp(stub.continuation());
+#undef __
+}
+
+void C2_MacroAssembler::convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src) {
+  assert(dst_bt == T_INT || dst_bt == T_LONG, "");
+  assert(src_bt == T_FLOAT || src_bt == T_DOUBLE, "");
+
+  address slowpath_target;
+  if (dst_bt == T_INT) {
+    if (src_bt == T_FLOAT) {
+      cvttss2sil(dst, src);
+      cmpl(dst, 0x80000000);
+      slowpath_target = StubRoutines::x86::f2i_fixup();
+    } else {
+      cvttsd2sil(dst, src);
+      cmpl(dst, 0x80000000);
+      slowpath_target = StubRoutines::x86::d2i_fixup();
+    }
+  } else {
+    if (src_bt == T_FLOAT) {
+      cvttss2siq(dst, src);
+      cmp64(dst, ExternalAddress(StubRoutines::x86::double_sign_flip()));
+      slowpath_target = StubRoutines::x86::f2l_fixup();
+    } else {
+      cvttsd2siq(dst, src);
+      cmp64(dst, ExternalAddress(StubRoutines::x86::double_sign_flip()));
+      slowpath_target = StubRoutines::x86::d2l_fixup();
+    }
+  }
+
+  auto stub = C2CodeStub::make<Register, XMMRegister, address>(dst, src, slowpath_target, 23, convertF2I_slowpath);
+  jcc(Assembler::equal, stub->entry());
+  bind(stub->continuation());
+}
+
+#endif // _LP64
+
 void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
                                    XMMRegister src1, int imm8, bool merge, int vlen_enc) {
  switch(ideal_opc) {
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@ -304,6 +304,10 @@ public:
  void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype);
  void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype);

+#ifdef _LP64
+  void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src);
+#endif
+
  void evmasked_op(int ideal_opc, BasicType eType, KRegister mask,
                   XMMRegister dst, XMMRegister src1, XMMRegister src2,
                   bool merge, int vlen_enc, bool is_varshift = false);
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@ -11020,9 +11020,9 @@ instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
 %{
  match(Set dst (ConvF2I src));
  effect(KILL cr);
-  format %{ "convert_f2i $dst,$src" %}
+  format %{ "convert_f2i $dst, $src" %}
  ins_encode %{
-    __ convert_f2i($dst$$Register, $src$$XMMRegister);
+    __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
 %}
@ -11031,9 +11031,9 @@ instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
 %{
  match(Set dst (ConvF2L src));
  effect(KILL cr);
-  format %{ "convert_f2l $dst,$src"%}
+  format %{ "convert_f2l $dst, $src"%}
  ins_encode %{
-    __ convert_f2l($dst$$Register, $src$$XMMRegister);
+    __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
 %}
@ -11042,9 +11042,9 @@ instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
 %{
  match(Set dst (ConvD2I src));
  effect(KILL cr);
-  format %{ "convert_d2i $dst,$src"%}
+  format %{ "convert_d2i $dst, $src"%}
  ins_encode %{
-    __ convert_d2i($dst$$Register, $src$$XMMRegister);
+    __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
 %}
@ -11053,9 +11053,9 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
 %{
  match(Set dst (ConvD2L src));
  effect(KILL cr);
-  format %{ "convert_d2l $dst,$src"%}
+  format %{ "convert_d2l $dst, $src"%}
  ins_encode %{
-    __ convert_d2l($dst$$Register, $src$$XMMRegister);
+    __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
 %}
--- a/src/hotspot/share/metaprogramming/enableIf.hpp
+++ b/src/hotspot/share/metaprogramming/enableIf.hpp
@ -25,7 +25,6 @@
 #ifndef SHARE_METAPROGRAMMING_ENABLEIF_HPP
 #define SHARE_METAPROGRAMMING_ENABLEIF_HPP

-#include "metaprogramming/logical.hpp"
 #include <type_traits>

 // Retained temporarily for backward compatibility.
--- a/src/hotspot/share/opto/c2_CodeStubs.cpp
+++ b/src/hotspot/share/opto/c2_CodeStubs.cpp
@ -52,3 +52,10 @@ void C2CodeStubList::emit(CodeBuffer& cb) {
    assert(max_size >= actual_size, "Expected stub size (%d) must be larger than or equal to actual stub size (%d)", max_size, actual_size);
  }
 }
+
+// move here to avoid circular dependency between c2_CodeStubs.hpp and output.hpp
+void C2CodeStub::add_to_stub_list() {
+  if (!Compile::current()->output()->in_scratch_emit_size()) {
+    Compile::current()->output()->add_stub(this);
+  }
+}
--- a/src/hotspot/share/opto/c2_CodeStubs.hpp
+++ b/src/hotspot/share/opto/c2_CodeStubs.hpp
@ -26,16 +26,23 @@
 #include "asm/codeBuffer.hpp"
 #include "memory/allocation.hpp"
 #include "opto/c2_MacroAssembler.hpp"
+#include "opto/compile.hpp"
+#include "opto/output.hpp"
 #include "utilities/growableArray.hpp"
+#include "utilities/tuple.hpp"

 #ifndef SHARE_OPTO_C2_CODESTUBS_HPP
 #define SHARE_OPTO_C2_CODESTUBS_HPP

+template <class... Ts>
+class C2GeneralStub;
+
 class C2CodeStub : public ArenaObj {
 private:
  Label _entry;
  Label _continuation;

+  void add_to_stub_list();
 protected:
  C2CodeStub() :
    _entry(),
@ -47,6 +54,10 @@ public:

  virtual void emit(C2_MacroAssembler& masm) = 0;
  virtual int max_size() const = 0;
+
+  template <class... Ts>
+  static C2GeneralStub<Ts...>* make(const Ts&... data, int max_size,
+                                    void (*emit)(C2_MacroAssembler&, C2GeneralStub<Ts...>&));
 };

 class C2CodeStubList {
@ -101,4 +112,53 @@ public:
 };
 #endif

+//-----------------------------C2GeneralStub-----------------------------------
+// A generalized stub that can be used to implement an arbitrary stub in a
+// type-safe manner. An example:
+//
+// Register dst; XMMRegister src;
+// // The lambda defining how the code is emitted in the stub
+// auto slowpath = [](C2_MacroAssembler& masm, C2GeneralStub<Register, XMMRegister>& stub) {
+//   // Access the saved data in a type safe manner
+//   Register dst = stub.get<0>();
+//   XMMRegister src = stub.get<1>();
+//   masm.bind(stub.entry());
+//   ...
+//   masm.jump(stub.continuation());
+// }
+// // Create a stub with 2 data fields being dst and src, a max size of 4 bytes
+// // and predefined emission function
+// auto stub = C2CodeStub::make<Register, XMMRegister>(dst, src, 4, slowpath);
+// __ jump_conditional(stub->entry());
+// ...
+// __ bind(stub->continuation());
+//
+template <class... Ts>
+class C2GeneralStub : public C2CodeStub {
+private:
+  Tuple<Ts...> _data;
+  int _max_size;
+  void (*_emit)(C2_MacroAssembler&, C2GeneralStub&);
+
+  constexpr C2GeneralStub(const Ts&... data, int max_size,
+                          void (*emit)(C2_MacroAssembler&, C2GeneralStub<Ts...>&))
+    : _data(data...), _max_size(max_size), _emit(emit) {}
+
+  friend C2CodeStub;
+public:
+  template <std::size_t I>
+  constexpr const auto& data() const { return _data.template get<I>(); }
+
+  int max_size() const { return _max_size; }
+  void emit(C2_MacroAssembler& masm) { _emit(masm, *this); }
+};
+
+template <class... Ts>
+C2GeneralStub<Ts...>* C2CodeStub::make(const Ts&... data, int max_size,
+                                       void (*emit)(C2_MacroAssembler&, C2GeneralStub<Ts...>&)) {
+  auto stub = new (Compile::current()->comp_arena()) C2GeneralStub<Ts...>(data..., max_size, emit);
+  stub->add_to_stub_list();
+  return stub;
+}
+
 #endif // SHARE_OPTO_C2_CODESTUBS_HPP
--- a/src/hotspot/share/utilities/tuple.hpp
+++ b/src/hotspot/share/utilities/tuple.hpp
@ -0,0 +1,57 @@
+/*
+* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+#ifndef SHARE_UTILITIES_TUPLE_HPP
+#define SHARE_UTILITIES_TUPLE_HPP
+
+#include <type_traits>
+
+template <class... Ts>
+class Tuple;
+
+template <>
+class Tuple<> {};
+
+template <class T, class... Ts>
+class Tuple<T, Ts...> {
+private:
+  T _first;
+  Tuple<Ts...> _remaining;
+
+public:
+  constexpr Tuple(const T& first, const Ts&... remaining) noexcept
+    : _first(first), _remaining(remaining...) {}
+
+  template <std::size_t I, std::enable_if_t<(I > 0), int> = 0>
+  constexpr const auto& get() const noexcept {
+    return _remaining.template get<I - 1>();
+  };
+
+  template <std::size_t I, std::enable_if_t<I == 0, int> = 0>
+  constexpr const T& get() const noexcept {
+    return _first;
+  }
+};
+
+#endif // SHARE_UTILITIES_TUPLE_HPP
--- a/test/micro/org/openjdk/bench/vm/compiler/x86/ConvertF2I.java
+++ b/test/micro/org/openjdk/bench/vm/compiler/x86/ConvertF2I.java
@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.vm.compiler.x86;
+
+import org.openjdk.jmh.annotations.*;
+
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 1)
+@Fork(value = 1, jvmArgsAppend = {"-XX:-UseSuperWord"})
+public class ConvertF2I {
+    static final int LENGTH = 1000;
+    static final int[] INT_ARRAY = new int[LENGTH];
+    static final long[] LONG_ARRAY = new long[LENGTH];
+    static final float[] FLOAT_ARRAY = new float[LENGTH];
+    static final double[] DOUBLE_ARRAY = new double[LENGTH];
+    float f;
+    double d;
+
+    @Benchmark
+    public int f2iSingle() {
+        return (int)f;
+    }
+
+    @Benchmark
+    public long f2lSingle() {
+        return (long)f;
+    }
+
+    @Benchmark
+    public int d2iSingle() {
+        return (int)d;
+    }
+
+    @Benchmark
+    public long d2lSingle() {
+        return (long)d;
+    }
+
+    @Benchmark
+    public void f2iArray() {
+        for (int i = 0; i < LENGTH; i++) {
+            INT_ARRAY[i] = (int)FLOAT_ARRAY[i];
+        }
+    }
+
+    @Benchmark
+    public void f2lArray() {
+        for (int i = 0; i < LENGTH; i++) {
+            LONG_ARRAY[i] = (long)FLOAT_ARRAY[i];
+        }
+    }
+
+    @Benchmark
+    public void d2iArray() {
+        for (int i = 0; i < LENGTH; i++) {
+            INT_ARRAY[i] = (int)DOUBLE_ARRAY[i];
+        }
+    }
+
+    @Benchmark
+    public void d2lArray() {
+        for (int i = 0; i < LENGTH; i++) {
+            LONG_ARRAY[i] = (long)DOUBLE_ARRAY[i];
+        }
+    }
+}