From fe57da55269fe0dcd0ce99f23b531d1fa50ea3ab Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Tue, 10 Mar 2026 18:07:58 +1100 Subject: [PATCH] 8379327: 128-bit multiplication uses two multiply instructions on x86_64 --- src/hotspot/cpu/x86/x86.ad | 52 +++++++ src/hotspot/share/opto/classes.hpp | 2 + src/hotspot/share/opto/compile.cpp | 43 ++++++ src/hotspot/share/opto/compile.hpp | 1 + src/hotspot/share/opto/mulnode.cpp | 32 ++++ src/hotspot/share/opto/mulnode.hpp | 48 ++++++ .../c2/TestMultiplyHighLowFusion.java | 127 ++++++++++++++++ .../c2/TestMultiplyHighLowFusionX64.java | 140 ++++++++++++++++++ 8 files changed, 445 insertions(+) create mode 100644 test/hotspot/jtreg/compiler/c2/TestMultiplyHighLowFusion.java create mode 100644 test/hotspot/jtreg/compiler/c2/TestMultiplyHighLowFusionX64.java diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index ed380105565..2a08534a023 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -11647,6 +11647,58 @@ instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr) ins_pipe(ialu_reg_mem_alu0); %} +instruct mulHiLoL_rReg(rax_RegL rax, rdx_RegL rdx, rRegL src, rFlagsReg cr) +%{ + match(MulHiLoL src rax); + effect(KILL cr); + + ins_cost(300); + format %{ "imulq RDX:RAX, RAX, $src\t# mulhilo" %} + ins_encode %{ + __ imulq($src$$Register); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + +instruct mulHiLoL_rReg_swap(rax_RegL rax, rdx_RegL rdx, rRegL src, rFlagsReg cr) +%{ + match(MulHiLoL rax src); + effect(KILL cr); + + ins_cost(300); + format %{ "imulq RDX:RAX, RAX, $src\t# mulhilo" %} + ins_encode %{ + __ imulq($src$$Register); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + +instruct umulHiLoL_rReg(rax_RegL rax, rdx_RegL rdx, rRegL src, rFlagsReg cr) +%{ + match(UMulHiLoL src rax); + effect(KILL cr); + + ins_cost(300); + format %{ "mulq RDX:RAX, RAX, $src\t# umulhilo" %} + ins_encode %{ + __ mulq($src$$Register); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + +instruct umulHiLoL_rReg_swap(rax_RegL rax, rdx_RegL rdx, rRegL src, rFlagsReg cr) +%{ + match(UMulHiLoL rax src); + effect(KILL cr); + + ins_cost(300); + format %{ "mulq RDX:RAX, RAX, $src\t# umulhilo" %} + ins_encode %{ + __ mulq($src$$Register); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr) %{ match(Set dst (MulHiL src rax)); diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp index abd93fdd876..921afdccaae 100644 --- a/src/hotspot/share/opto/classes.hpp +++ b/src/hotspot/share/opto/classes.hpp @@ -266,6 +266,8 @@ macro(MulD) macro(MulF) macro(MulHiL) macro(UMulHiL) +macro(MulHiLoL) +macro(UMulHiLoL) macro(MulI) macro(MulL) macro(Multi) diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index f1ea8231df9..fbcacfe8819 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -3259,6 +3259,41 @@ void Compile::handle_div_mod_op(Node* n, BasicType bt, bool is_unsigned) { } } +void Compile::handle_mulhi_mul_op(Node* n, bool is_unsigned) { + const int fused_opcode = is_unsigned ? Op_UMulHiLoL : Op_MulHiLoL; + if (!Matcher::has_match_rule(fused_opcode)) { + return; + } + + Node* mul = n->find_similar(Op_MulL); + if (mul == nullptr) { + Node* lhs = n->in(1); + Node* rhs = n->in(2); + if (rhs != nullptr && rhs->outcnt() >= 2) { + for (DUIterator_Fast dmax, i = rhs->fast_outs(dmax); i < dmax; i++) { + Node* use = rhs->fast_out(i); + if (use != n && + use->Opcode() == Op_MulL && + use->req() == n->req() && + use->in(1) == rhs && + use->in(2) == lhs) { + mul = use; + break; + } + } + } + } + + if (mul == nullptr) { + return; + } + + MulHiLoLNode* mul_hi_lo = is_unsigned ? static_cast(UMulHiLoLNode::make(n)) + : MulHiLoLNode::make(n); + mul->subsume_by(mul_hi_lo->lo_proj(), this); + n->subsume_by(mul_hi_lo->hi_proj(), this); +} + void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& frc, uint nop, Unique_Node_List& dead_nodes) { switch( nop ) { // Count all float operations that may use FPU @@ -3730,6 +3765,14 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f handle_div_mod_op(n, T_LONG, true); break; + case Op_MulHiL: + handle_mulhi_mul_op(n, false); + break; + + case Op_UMulHiL: + handle_mulhi_mul_op(n, true); + break; + case Op_LoadVector: case Op_StoreVector: #ifdef ASSERT diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp index eb6be669f24..4a1820e75d5 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -1241,6 +1241,7 @@ public: void final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& frc, uint nop, Unique_Node_List& dead_nodes); void final_graph_reshaping_walk(Node_Stack& nstack, Node* root, Final_Reshape_Counts& frc, Unique_Node_List& dead_nodes); void handle_div_mod_op(Node* n, BasicType bt, bool is_unsigned); + void handle_mulhi_mul_op(Node* n, bool is_unsigned); // Logic cone optimization. void optimize_logic_cones(PhaseIterGVN &igvn); diff --git a/src/hotspot/share/opto/mulnode.cpp b/src/hotspot/share/opto/mulnode.cpp index cac9f1dcc37..e56633d5bfd 100644 --- a/src/hotspot/share/opto/mulnode.cpp +++ b/src/hotspot/share/opto/mulnode.cpp @@ -23,6 +23,8 @@ */ #include "memory/allocation.inline.hpp" +#include "opto/machnode.hpp" +#include "opto/matcher.hpp" #include "opto/addnode.hpp" #include "opto/connode.hpp" #include "opto/convertnode.hpp" @@ -606,6 +608,36 @@ const Type* UMulHiLNode::Value(PhaseGVN* phase) const { return MulHiValue(t1, t2, bot); } +MulHiLoLNode* MulHiLoLNode::make(Node* mul_hi) { + assert(mul_hi->Opcode() == Op_MulHiL, "expected MulHiL"); + + MulHiLoLNode* mul_hi_lo = new MulHiLoLNode(mul_hi->in(1), mul_hi->in(2)); + [[maybe_unused]] Node* lo_proj = new ProjNode(mul_hi_lo, MulHiLoLNode::lo_proj_num); + [[maybe_unused]] Node* hi_proj = new ProjNode(mul_hi_lo, MulHiLoLNode::hi_proj_num); + return mul_hi_lo; +} + +UMulHiLoLNode* UMulHiLoLNode::make(Node* umul_hi) { + assert(umul_hi->Opcode() == Op_UMulHiL, "expected UMulHiL"); + + UMulHiLoLNode* umul_hi_lo = new UMulHiLoLNode(umul_hi->in(1), umul_hi->in(2)); + [[maybe_unused]] Node* lo_proj = new ProjNode(umul_hi_lo, MulHiLoLNode::lo_proj_num); + [[maybe_unused]] Node* hi_proj = new ProjNode(umul_hi_lo, MulHiLoLNode::hi_proj_num); + return umul_hi_lo; +} + +Node* MulHiLoLNode::match(const ProjNode* proj, const Matcher* match) { + uint ideal_reg = proj->ideal_reg(); + RegMask rm; + if (proj->_con == lo_proj_num) { + rm.assignFrom(match->divL_proj_mask()); + } else { + assert(proj->_con == hi_proj_num, "must be lo or hi projection"); + rm.assignFrom(match->modL_proj_mask()); + } + return new MachProjNode(this, proj->_con, rm, ideal_reg); +} + // A common routine used by UMulHiLNode and MulHiLNode const Type* MulHiValue(const Type *t1, const Type *t2, const Type *bot) { // Either input is TOP ==> the result is TOP diff --git a/src/hotspot/share/opto/mulnode.hpp b/src/hotspot/share/opto/mulnode.hpp index 1e19e8ec5cd..f35e90ad484 100644 --- a/src/hotspot/share/opto/mulnode.hpp +++ b/src/hotspot/share/opto/mulnode.hpp @@ -25,6 +25,7 @@ #ifndef SHARE_OPTO_MULNODE_HPP #define SHARE_OPTO_MULNODE_HPP +#include "opto/multnode.hpp" #include "opto/node.hpp" #include "opto/opcodes.hpp" #include "opto/type.hpp" @@ -32,6 +33,7 @@ // Portions of code courtesy of Clifford Click class PhaseTransform; +class Matcher; //------------------------------MulNode---------------------------------------- // Classic MULTIPLY functionality. This covers all the usual 'multiply' @@ -205,6 +207,52 @@ public: friend const Type* MulHiValue(const Type *t1, const Type *t2, const Type *bot); }; +//------------------------------MulHiLoLNode----------------------------------- +// Lower and upper 64-bit results of a signed 64x64->128 multiply. +class MulHiLoLNode : public MultiNode { +protected: + MulHiLoLNode(Node* in1, Node* in2) : MultiNode(3) { + init_req(0, nullptr); + init_req(1, in1); + init_req(2, in2); + } + +public: + enum { + lo_proj_num = 0, + hi_proj_num = 1 + }; + + virtual int Opcode() const; + virtual Node* Identity(PhaseGVN* phase) { return this; } + virtual Node* Ideal(PhaseGVN* phase, bool can_reshape) { return nullptr; } + virtual const Type* Value(PhaseGVN* phase) const { return bottom_type(); } + virtual uint hash() const { return Node::hash(); } + virtual bool is_CFG() const { return false; } + virtual uint ideal_reg() const { return NotAMachineReg; } + virtual const Type* bottom_type() const { return TypeTuple::LONG_PAIR; } + + virtual Node* match(const ProjNode* proj, const Matcher* m); + + static MulHiLoLNode* make(Node* mul_hi); + + ProjNode* lo_proj() { return proj_out_or_null(lo_proj_num); } + ProjNode* hi_proj() { return proj_out_or_null(hi_proj_num); } + +private: + virtual bool depends_only_on_test() const { return false; } +}; + +//------------------------------UMulHiLoLNode---------------------------------- +// Lower and upper 64-bit results of an unsigned 64x64->128 multiply. +class UMulHiLoLNode : public MulHiLoLNode { +public: + UMulHiLoLNode(Node* in1, Node* in2) : MulHiLoLNode(in1, in2) {} + virtual int Opcode() const; + + static UMulHiLoLNode* make(Node* umul_hi); +}; + //------------------------------AndINode--------------------------------------- // Logically AND 2 integers. Included with the MUL nodes because it inherits // all the behavior of multiplication on a ring. diff --git a/test/hotspot/jtreg/compiler/c2/TestMultiplyHighLowFusion.java b/test/hotspot/jtreg/compiler/c2/TestMultiplyHighLowFusion.java new file mode 100644 index 00000000000..8d6949e9808 --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/TestMultiplyHighLowFusion.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8379327 + * @summary Verify correctness for combined low/high 64-bit multiplication patterns. + * @run main/othervm -Xbatch -XX:-TieredCompilation -XX:CompileThreshold=100 + * -XX:CompileCommand=compileonly,compiler.c2.TestMultiplyHighLowFusion::doMath + * -XX:CompileCommand=compileonly,compiler.c2.TestMultiplyHighLowFusion::doMathSwapped + * -XX:CompileCommand=compileonly,compiler.c2.TestMultiplyHighLowFusion::doUnsignedMath + * -XX:CompileCommand=compileonly,compiler.c2.TestMultiplyHighLowFusion::doUnsignedMathSwapped + * compiler.c2.TestMultiplyHighLowFusion + */ + +package compiler.c2; + +import java.math.BigInteger; +import java.util.Random; + +public class TestMultiplyHighLowFusion { + private static final BigInteger MASK_64 = BigInteger.ONE.shiftLeft(64).subtract(BigInteger.ONE); + private static final long[] CORNER_CASES = { + 0L, + 1L, + -1L, + Long.MIN_VALUE, + Long.MAX_VALUE, + 0x00000000FFFFFFFFL, + 0xFFFFFFFF00000000L, + 0x123456789ABCDEFL, + 0xFEDCBA9876543210L + }; + + private static long doMath(long a, long b) { + long low = a * b; + long high = Math.multiplyHigh(a, b); + return low + high; + } + + private static long doMathSwapped(long a, long b) { + long low = b * a; + long high = Math.multiplyHigh(b, a); + return low + high; + } + + private static long doUnsignedMath(long a, long b) { + long low = a * b; + long high = Math.unsignedMultiplyHigh(a, b); + return low + high; + } + + private static long doUnsignedMathSwapped(long a, long b) { + long low = b * a; + long high = Math.unsignedMultiplyHigh(b, a); + return low + high; + } + + private static long expectedSigned(long a, long b) { + BigInteger product = BigInteger.valueOf(a).multiply(BigInteger.valueOf(b)); + long low = product.longValue(); + long high = product.shiftRight(64).longValue(); + return low + high; + } + + private static long expectedUnsigned(long a, long b) { + BigInteger ua = BigInteger.valueOf(a).and(MASK_64); + BigInteger ub = BigInteger.valueOf(b).and(MASK_64); + BigInteger product = ua.multiply(ub); + long low = product.longValue(); + long high = product.shiftRight(64).longValue(); + return low + high; + } + + private static void verifyPair(long a, long b) { + long expectedSigned = expectedSigned(a, b); + long expectedUnsigned = expectedUnsigned(a, b); + + long signed = doMath(a, b); + long signedSwapped = doMathSwapped(a, b); + long unsigned = doUnsignedMath(a, b); + long unsignedSwapped = doUnsignedMathSwapped(a, b); + + if (signed != expectedSigned || signedSwapped != expectedSigned) { + throw new RuntimeException("Signed mismatch for a=" + a + ", b=" + b + + ": got=" + signed + ", gotSwapped=" + signedSwapped + ", expected=" + expectedSigned); + } + if (unsigned != expectedUnsigned || unsignedSwapped != expectedUnsigned) { + throw new RuntimeException("Unsigned mismatch for a=" + a + ", b=" + b + + ": got=" + unsigned + ", gotSwapped=" + unsignedSwapped + ", expected=" + expectedUnsigned); + } + } + + public static void main(String[] args) { + Random random = new Random(0x8379327L); + + for (long a : CORNER_CASES) { + for (long b : CORNER_CASES) { + verifyPair(a, b); + } + } + + for (int i = 0; i < 200_000; i++) { + verifyPair(random.nextLong(), random.nextLong()); + } + } +} diff --git a/test/hotspot/jtreg/compiler/c2/TestMultiplyHighLowFusionX64.java b/test/hotspot/jtreg/compiler/c2/TestMultiplyHighLowFusionX64.java new file mode 100644 index 00000000000..6d848fbc120 --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/TestMultiplyHighLowFusionX64.java @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8379327 + * @summary Verify x64 backend emits one multiply for low+high 64-bit multiply pattern. + * + * @requires vm.compiler2.enabled + * @requires os.arch=="amd64" | os.arch=="x86_64" + * + * @library /test/lib + * @run driver compiler.c2.TestMultiplyHighLowFusionX64 + */ + +package compiler.c2; + +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import jdk.test.lib.process.OutputAnalyzer; +import jdk.test.lib.process.ProcessTools; + +public class TestMultiplyHighLowFusionX64 { + private static final Pattern MULHILO = Pattern.compile("#\\s*mulhilo\\b"); + private static final Pattern UMULHILO = Pattern.compile("#\\s*umulhilo\\b"); + + public static void main(String[] args) throws Exception { + verifyMethod("doMath", true); + verifyMethod("doMathSwapped", true); + verifyMethod("doUnsignedMath", false); + verifyMethod("doUnsignedMathSwapped", false); + } + + private static void verifyMethod(String methodName, boolean signed) throws Exception { + List command = new ArrayList<>(); + command.add("-XX:+UnlockDiagnosticVMOptions"); + command.add("-XX:-TieredCompilation"); + command.add("-Xbatch"); + command.add("-XX:CompileCommand=compileonly," + Launcher.class.getName() + "::" + methodName); + command.add("-XX:CompileCommand=print," + Launcher.class.getName() + "::" + methodName); + command.add(Launcher.class.getName()); + command.add(methodName); + + OutputAnalyzer output = ProcessTools.executeTestJava(command); + output.shouldHaveExitValue(0); + + int mulCount = countMul(output.getOutput(), signed); + if (mulCount != 1) { + throw new RuntimeException("Expected exactly one multiply in " + methodName + ", found " + mulCount + + "\nFull output:\n" + output.getOutput()); + } + } + + private static int countMul(String output, boolean signed) { + String lower = output.toLowerCase(Locale.ROOT); + Pattern pattern = signed ? MULHILO : UMULHILO; + int count = 0; + Matcher matcher = pattern.matcher(lower); + while (matcher.find()) { + count++; + } + return count; + } + + static class Launcher { + static long doMath(long a, long b) { + long low = a * b; + long high = Math.multiplyHigh(a, b); + return low + high; + } + + static long doMathSwapped(long a, long b) { + long low = b * a; + long high = Math.multiplyHigh(b, a); + return low + high; + } + + static long doUnsignedMath(long a, long b) { + long low = a * b; + long high = Math.unsignedMultiplyHigh(a, b); + return low + high; + } + + static long doUnsignedMathSwapped(long a, long b) { + long low = b * a; + long high = Math.unsignedMultiplyHigh(b, a); + return low + high; + } + + public static void main(String[] args) { + String mode = args[0]; + long acc = 0; + long b = 987654321L; + for (int i = 0; i < 200_000; i++) { + switch (mode) { + case "doMath": + acc += doMath(i, b); + break; + case "doMathSwapped": + acc += doMathSwapped(i, b); + break; + case "doUnsignedMath": + acc += doUnsignedMath(i, b); + break; + case "doUnsignedMathSwapped": + acc += doUnsignedMathSwapped(i, b); + break; + default: + throw new RuntimeException("Unknown mode: " + mode); + } + } + if (acc == 42) { + System.out.println("Impossible"); + } + } + } +}