8378713: C2: performance regression due to missing constant folding for Math.pow()

Reviewed-by: roland, mchevalier
This commit is contained in:
Kangcheng Xu 2026-03-24 16:16:51 +00:00 committed by Roland Westrelin
parent d0d85cd6b5
commit 9658c19afd
7 changed files with 431 additions and 65 deletions

View File

@ -43,6 +43,7 @@
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/powerOfTwo.hpp"
// Portions of code courtesy of Clifford Click
@ -1371,6 +1372,25 @@ TupleNode* CallLeafPureNode::make_tuple_of_input_state_and_top_return_values(con
return tuple;
}
CallLeafPureNode* CallLeafPureNode::inline_call_leaf_pure_node(Node* control) const {
Node* top = Compile::current()->top();
if (control == nullptr) {
control = in(TypeFunc::Control);
}
CallLeafPureNode* call = new CallLeafPureNode(tf(), entry_point(), _name);
call->init_req(TypeFunc::Control, control);
call->init_req(TypeFunc::I_O, top);
call->init_req(TypeFunc::Memory, top);
call->init_req(TypeFunc::ReturnAdr, top);
call->init_req(TypeFunc::FramePtr, top);
for (unsigned int i = 0; i < tf()->domain()->cnt() - TypeFunc::Parms; i++) {
call->init_req(TypeFunc::Parms + i, in(TypeFunc::Parms + i));
}
return call;
}
Node* CallLeafPureNode::Ideal(PhaseGVN* phase, bool can_reshape) {
if (is_dead()) {
return nullptr;
@ -2437,3 +2457,157 @@ bool CallNode::may_modify_arraycopy_helper(const TypeOopPtr* dest_t, const TypeO
return true;
}
PowDNode::PowDNode(Compile* C, Node* base, Node* exp)
: CallLeafPureNode(
OptoRuntime::Math_DD_D_Type(),
StubRoutines::dpow() != nullptr ? StubRoutines::dpow() : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
"pow") {
add_flag(Flag_is_macro);
C->add_macro_node(this);
init_req(TypeFunc::Parms + 0, base);
init_req(TypeFunc::Parms + 1, C->top()); // double slot padding
init_req(TypeFunc::Parms + 2, exp);
init_req(TypeFunc::Parms + 3, C->top()); // double slot padding
}
const Type* PowDNode::Value(PhaseGVN* phase) const {
const Type* t_base = phase->type(base());
const Type* t_exp = phase->type(exp());
if (t_base == Type::TOP || t_exp == Type::TOP) {
return Type::TOP;
}
const TypeD* base_con = t_base->isa_double_constant();
const TypeD* exp_con = t_exp->isa_double_constant();
const TypeD* result_t = nullptr;
// constant folding: both inputs are constants
if (base_con != nullptr && exp_con != nullptr) {
result_t = TypeD::make(SharedRuntime::dpow(base_con->getd(), exp_con->getd()));
}
// Special cases when only the exponent is known:
if (exp_con != nullptr) {
double e = exp_con->getd();
// If the second argument is positive or negative zero, then the result is 1.0.
// i.e., pow(x, +/-0.0D) => 1.0
if (e == 0.0) { // true for both -0.0 and +0.0
result_t = TypeD::ONE;
}
// If the second argument is NaN, then the result is NaN.
// i.e., pow(x, NaN) => NaN
if (g_isnan(e)) {
result_t = TypeD::make(NAN);
}
}
if (result_t != nullptr) {
// We can't simply return a TypeD here, it must be a tuple type to be compatible with call nodes.
const Type** fields = TypeTuple::fields(2);
fields[TypeFunc::Parms + 0] = result_t;
fields[TypeFunc::Parms + 1] = Type::HALF;
return TypeTuple::make(TypeFunc::Parms + 2, fields);
}
return tf()->range();
}
Node* PowDNode::Ideal(PhaseGVN* phase, bool can_reshape) {
if (!can_reshape) {
return nullptr; // wait for igvn
}
PhaseIterGVN* igvn = phase->is_IterGVN();
Node* base = this->base();
Node* exp = this->exp();
const Type* t_exp = phase->type(exp);
const TypeD* exp_con = t_exp->isa_double_constant();
// Special cases when only the exponent is known:
if (exp_con != nullptr) {
double e = exp_con->getd();
// If the second argument is 1.0, then the result is the same as the first argument.
// i.e., pow(x, 1.0) => x
if (e == 1.0) {
return make_tuple_of_input_state_and_result(igvn, base);
}
// If the second argument is 2.0, then strength reduce to multiplications.
// i.e., pow(x, 2.0) => x * x
if (e == 2.0) {
Node* mul = igvn->transform(new MulDNode(base, base));
return make_tuple_of_input_state_and_result(igvn, mul);
}
// If the second argument is 0.5, the strength reduce to square roots.
// i.e., pow(x, 0.5) => sqrt(x) iff x > 0
if (e == 0.5 && Matcher::match_rule_supported(Op_SqrtD)) {
Node* ctrl = in(TypeFunc::Control);
Node* zero = igvn->zerocon(T_DOUBLE);
// According to the API specs, pow(-0.0, 0.5) = 0.0 and sqrt(-0.0) = -0.0.
// So pow(-0.0, 0.5) shouldn't be replaced with sqrt(-0.0).
// -0.0/+0.0 are both excluded since floating-point comparison doesn't distinguish -0.0 from +0.0.
Node* cmp = igvn->register_new_node_with_optimizer(new CmpDNode(base, zero));
Node* test = igvn->register_new_node_with_optimizer(new BoolNode(cmp, BoolTest::le));
IfNode* iff = new IfNode(ctrl, test, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);
igvn->register_new_node_with_optimizer(iff);
Node* if_slow = igvn->register_new_node_with_optimizer(new IfTrueNode(iff)); // x <= 0
Node* if_fast = igvn->register_new_node_with_optimizer(new IfFalseNode(iff)); // x > 0
// slow path: call pow(x, 0.5)
Node* call = igvn->register_new_node_with_optimizer(inline_call_leaf_pure_node(if_slow));
Node* call_ctrl = igvn->register_new_node_with_optimizer(new ProjNode(call, TypeFunc::Control));
Node* call_result = igvn->register_new_node_with_optimizer(new ProjNode(call, TypeFunc::Parms + 0));
// fast path: sqrt(x)
Node* sqrt = igvn->register_new_node_with_optimizer(new SqrtDNode(igvn->C, if_fast, base));
// merge paths
RegionNode* region = new RegionNode(3);
igvn->register_new_node_with_optimizer(region);
region->init_req(1, call_ctrl); // slow path
region->init_req(2, if_fast); // fast path
PhiNode* phi = new PhiNode(region, Type::DOUBLE);
igvn->register_new_node_with_optimizer(phi);
phi->init_req(1, call_result); // slow: pow() result
phi->init_req(2, sqrt); // fast: sqrt() result
igvn->C->set_has_split_ifs(true); // Has chance for split-if optimization
return make_tuple_of_input_state_and_result(igvn, phi, region);
}
}
return CallLeafPureNode::Ideal(phase, can_reshape);
}
// We can't simply have Ideal() returning a Con or MulNode since the users are still expecting a Call node, but we could
// produce a tuple that follows the same pattern so users can still get control, io, memory, etc..
TupleNode* PowDNode::make_tuple_of_input_state_and_result(PhaseIterGVN* phase, Node* result, Node* control) {
if (control == nullptr) {
control = in(TypeFunc::Control);
}
Compile* C = phase->C;
C->remove_macro_node(this);
TupleNode* tuple = TupleNode::make(
tf()->range(),
control,
in(TypeFunc::I_O),
in(TypeFunc::Memory),
in(TypeFunc::FramePtr),
in(TypeFunc::ReturnAdr),
result,
C->top());
return tuple;
}

View File

@ -948,6 +948,8 @@ public:
}
int Opcode() const override;
Node* Ideal(PhaseGVN* phase, bool can_reshape) override;
CallLeafPureNode* inline_call_leaf_pure_node(Node* control = nullptr) const;
};
//------------------------------CallLeafNoFPNode-------------------------------
@ -1299,4 +1301,19 @@ public:
JVMState* dbg_jvms() const { return nullptr; }
#endif
};
//------------------------------PowDNode--------------------------------------
class PowDNode : public CallLeafPureNode {
TupleNode* make_tuple_of_input_state_and_result(PhaseIterGVN* phase, Node* result, Node* control = nullptr);
public:
PowDNode(Compile* C, Node* base, Node* exp);
int Opcode() const override;
const Type* Value(PhaseGVN* phase) const override;
Node* Ideal(PhaseGVN* phase, bool can_reshape) override;
Node* base() const { return in(TypeFunc::Parms + 0); }
Node* exp() const { return in(TypeFunc::Parms + 2); }
};
#endif // SHARE_OPTO_CALLNODE_HPP

View File

@ -286,6 +286,7 @@ macro(OpaqueZeroTripGuard)
macro(OpaqueConstantBool)
macro(OpaqueInitializedAssertionPredicate)
macro(OpaqueTemplateAssertionPredicate)
macro(PowD)
macro(ProfileBoolean)
macro(OrI)
macro(OrL)

View File

@ -1819,61 +1819,17 @@ bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, c
//------------------------------inline_math_pow-----------------------------
bool LibraryCallKit::inline_math_pow() {
Node* base = argument(0);
Node* exp = argument(2);
const TypeD* d = _gvn.type(exp)->isa_double_constant();
if (d != nullptr) {
if (d->getd() == 2.0) {
// Special case: pow(x, 2.0) => x * x
Node* base = argument(0);
set_result(_gvn.transform(new MulDNode(base, base)));
return true;
} else if (d->getd() == 0.5 && Matcher::match_rule_supported(Op_SqrtD)) {
// Special case: pow(x, 0.5) => sqrt(x)
Node* base = argument(0);
Node* zero = _gvn.zerocon(T_DOUBLE);
RegionNode* region = new RegionNode(3);
Node* phi = new PhiNode(region, Type::DOUBLE);
Node* cmp = _gvn.transform(new CmpDNode(base, zero));
// According to the API specs, pow(-0.0, 0.5) = 0.0 and sqrt(-0.0) = -0.0.
// So pow(-0.0, 0.5) shouldn't be replaced with sqrt(-0.0).
// -0.0/+0.0 are both excluded since floating-point comparison doesn't distinguish -0.0 from +0.0.
Node* test = _gvn.transform(new BoolNode(cmp, BoolTest::le));
Node* if_pow = generate_slow_guard(test, nullptr);
Node* value_sqrt = _gvn.transform(new SqrtDNode(C, control(), base));
phi->init_req(1, value_sqrt);
region->init_req(1, control());
if (if_pow != nullptr) {
set_control(if_pow);
address target = StubRoutines::dpow() != nullptr ? StubRoutines::dpow() :
CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
const TypePtr* no_memory_effects = nullptr;
Node* trig = make_runtime_call(RC_LEAF, OptoRuntime::Math_DD_D_Type(), target, "POW",
no_memory_effects, base, top(), exp, top());
Node* value_pow = _gvn.transform(new ProjNode(trig, TypeFunc::Parms+0));
#ifdef ASSERT
Node* value_top = _gvn.transform(new ProjNode(trig, TypeFunc::Parms+1));
assert(value_top == top(), "second value must be top");
#endif
phi->init_req(2, value_pow);
region->init_req(2, _gvn.transform(new ProjNode(trig, TypeFunc::Control)));
}
C->set_has_split_ifs(true); // Has chance for split-if optimization
set_control(_gvn.transform(region));
record_for_igvn(region);
set_result(_gvn.transform(phi));
return true;
}
}
return StubRoutines::dpow() != nullptr ?
runtime_math(OptoRuntime::Math_DD_D_Type(), StubRoutines::dpow(), "dpow") :
runtime_math(OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
CallNode* pow = new PowDNode(C, base, exp);
set_predefined_input_for_runtime_call(pow);
pow = _gvn.transform(pow)->as_CallLeafPure();
set_predefined_output_for_runtime_call(pow);
Node* result = _gvn.transform(new ProjNode(pow, TypeFunc::Parms + 0));
record_for_igvn(pow);
set_result(result);
return true;
}
//------------------------------inline_math_native-----------------------------

View File

@ -2500,6 +2500,7 @@ void PhaseMacroExpand::eliminate_macro_nodes() {
assert(n->Opcode() == Op_LoopLimit ||
n->Opcode() == Op_ModD ||
n->Opcode() == Op_ModF ||
n->Opcode() == Op_PowD ||
n->is_OpaqueConstantBool() ||
n->is_OpaqueInitializedAssertionPredicate() ||
n->Opcode() == Op_MaxL ||
@ -2656,18 +2657,11 @@ bool PhaseMacroExpand::expand_macro_nodes() {
default:
switch (n->Opcode()) {
case Op_ModD:
case Op_ModF: {
CallNode* mod_macro = n->as_Call();
CallNode* call = new CallLeafPureNode(mod_macro->tf(), mod_macro->entry_point(), mod_macro->_name);
call->init_req(TypeFunc::Control, mod_macro->in(TypeFunc::Control));
call->init_req(TypeFunc::I_O, C->top());
call->init_req(TypeFunc::Memory, C->top());
call->init_req(TypeFunc::ReturnAdr, C->top());
call->init_req(TypeFunc::FramePtr, C->top());
for (unsigned int i = 0; i < mod_macro->tf()->domain()->cnt() - TypeFunc::Parms; i++) {
call->init_req(TypeFunc::Parms + i, mod_macro->in(TypeFunc::Parms + i));
}
_igvn.replace_node(mod_macro, call);
case Op_ModF:
case Op_PowD: {
CallLeafPureNode* call_macro = n->as_CallLeafPure();
CallLeafPureNode* call = call_macro->inline_call_leaf_pure_node();
_igvn.replace_node(call_macro, call);
transform_later(call);
break;
}

View File

@ -0,0 +1,218 @@
/*
* Copyright (c) 2026, IBM and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
package compiler.intrinsics.math;
import jdk.test.lib.Asserts;
import compiler.lib.ir_framework.*;
import compiler.lib.generators.*;
import static compiler.lib.generators.Generators.*;
import java.util.Random;
/*
* @test
* @bug 8378713
* @key randomness
* @summary Math.pow(base, exp) should constant propagate
* @library /test/lib /
* @run driver ${test.main.class}
*/
public class PowDNodeTests {
public static final Generator<Double> UNIFORMS = G.uniformDoubles(); // [0, 1)
public static final double B = UNIFORMS.next() * 1000.0d;
public static final double E = UNIFORMS.next() * 1000.0d + 3.0d; // e >= 3 to avoid strength reduction code
public static void main(String[] args) {
TestFramework.run();
testCorrectness();
}
// Test 1: pow(2.0, 10.0) -> 1024.0
@Test
@IR(failOn = {IRNode.POW_D})
public static double constantLiteralFolding() {
return Math.pow(2.0, 10.0); // should fold to 1024.0
}
// Test 2: pow(final B, final E) -> B^E
@Test
@IR(failOn = {IRNode.POW_D})
public static double constantStaticFolding() {
return Math.pow(B, E); // should fold to B^E
}
// Test 3: pow(b, 0.0) -> 1.0
@Test
@IR(failOn = {IRNode.POW_D})
@Arguments(values = {Argument.RANDOM_EACH})
public static double expZero(double b) {
return Math.pow(b, 0.0);
}
// Test 4: pow(b, 1.0) -> b (identity)
@Test
@IR(failOn = {IRNode.POW_D})
@Arguments(values = {Argument.RANDOM_EACH})
public static double expOne(double b) {
return Math.pow(b, 1.0);
}
// Test 5: pow(b, NaN) -> NaN
@Test
@IR(failOn = {IRNode.POW_D})
@Arguments(values = {Argument.RANDOM_EACH})
public static double expNaN(double b) {
return Math.pow(b, Double.NaN);
}
// Test 6: pow(b, 2.0) -> b * b
// More tests in TestPow2Opt.java
@Test
@IR(failOn = {IRNode.POW_D})
@IR(counts = {IRNode.MUL_D, "1"})
@Arguments(values = {Argument.RANDOM_EACH})
public static double expTwo(double b) {
return Math.pow(b, 2.0);
}
// Test 7: pow(b, 0.5) -> b <= 0.0 ? pow(b, 0.5) : sqrt(b)
// More tests in TestPow0Dot5Opt.java
@Test
@IR(counts = {IRNode.IF, "1"})
@IR(counts = {IRNode.SQRT_D, "1"})
@IR(counts = {".*CallLeaf.*pow.*", "1"}, phase = CompilePhase.BEFORE_MATCHING)
@Arguments(values = {Argument.RANDOM_EACH})
public static double expDot5(double b) {
return Math.pow(b, 0.5); // expand to: if (b > 0) { sqrt(b) } else { call(b) }
}
// Test 8: non-constant exponent stays as call
@Test
@IR(counts = {IRNode.POW_D, "1"})
@Arguments(values = {Argument.RANDOM_EACH, Argument.RANDOM_EACH})
public static double nonConstant(double b, double e) {
return Math.pow(b, e);
}
// Test 9: late constant discovery on base (after loop opts)
@Test
@IR(counts = {IRNode.POW_D, "1"}, phase = CompilePhase.AFTER_PARSING)
@IR(failOn = {IRNode.POW_D})
public static double lateBaseConstant() {
double base = 0;
for (int i = 0; i < 4; i++) {
if ((i % 2) == 0) {
base = B;
}
}
// After loop opts, base == B (constant), so pow(B, E) folds
return Math.pow(base, E);
}
// Test 10: late constant discovery on exp (after loop opts)
@Test
@IR(counts = {IRNode.POW_D, "1"}, phase = CompilePhase.AFTER_PARSING)
@IR(failOn = {IRNode.POW_D})
public static double lateExpConstant() {
double exp = 0;
for (int i = 0; i < 4; i++) {
if ((i % 2) == 0) {
exp = E;
}
}
// After loop opts, exp == E (constant), so pow(B, E) folds
return Math.pow(B, exp);
}
// Test 11: late constant discoveries on both base and exp (after loop opts)
@Test
@IR(counts = {IRNode.POW_D, "1"}, phase = CompilePhase.AFTER_PARSING)
@IR(failOn = {IRNode.POW_D})
public static double lateBothConstant() {
double base = 0, exp = 0;
for (int i = 0; i < 4; i++) {
if ((i % 2) == 0) {
base = B;
exp = E;
}
}
// After loop opts, base = B, exp == E, so pow(B, E) folds
return Math.pow(base, exp);
}
private static void assertEQWithinOneUlp(double expected, double observed) {
if (Double.isNaN(expected) && Double.isNaN(observed)) return;
// Math.pow() requires result must be within 1 ulp of the respective magnitude
double ulp = Math.max(Math.ulp(expected), Math.ulp(observed));
if (Math.abs(expected - observed) > ulp) {
throw new AssertionError(String.format(
"expect = %x, observed = %x, ulp = %x",
Double.doubleToRawLongBits(expected), Double.doubleToRawLongBits(observed), Double.doubleToRawLongBits(ulp)
));
}
}
private static void testCorrectness() {
// No need to warm up for intrinsics
Asserts.assertEQ(1024.0d, constantLiteralFolding());
double BE = StrictMath.pow(B, E);
assertEQWithinOneUlp(BE, constantStaticFolding());
assertEQWithinOneUlp(BE, lateBaseConstant());
assertEQWithinOneUlp(BE, lateExpConstant());
assertEQWithinOneUlp(BE, lateBothConstant());
Generator<Double> anyBits = G.anyBitsDouble();
Generator<Double> largeDoubles = G.uniformDoubles(Long.MAX_VALUE, Double.MAX_VALUE);
Generator<Double> doubles = G.doubles();
double[] values = {
Double.MIN_VALUE, Double.MIN_NORMAL, -42.0d, -1.0d, -0.0d, +0.0d, 0.5d, 1.0d, 2.0d, 123d, Double.MAX_VALUE,
Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.NaN,
UNIFORMS.next(), UNIFORMS.next(),
largeDoubles.next(), -largeDoubles.next(), // some sufficiently large magnitudes
anyBits.next(), anyBits.next(), // any bits with potentially more NaN representation
doubles.next(), doubles.next() // a healthy sprinkle of whatever else is possible
};
for (double b : values) {
// Strength reduced, so we know the bits matches exactly
Asserts.assertEQ(1.0d, expZero(b));
Asserts.assertEQ(b, expOne(b));
Asserts.assertEQ(b * b, expTwo(b));
assertEQWithinOneUlp(Double.NaN, expNaN(b));
// Runtime calls, so make sure the result is within 1 ulp
assertEQWithinOneUlp(StrictMath.pow(b, 0.5d), expDot5(b));
for (double e : values) {
assertEQWithinOneUlp(StrictMath.pow(b, e), nonConstant(b, e));
}
}
}
}

View File

@ -3134,6 +3134,12 @@ public class IRNode {
macroNodes(MOD_D, regex);
}
public static final String POW_D = PREFIX + "POW_D" + POSTFIX;
static {
String regex = START + "PowD" + MID + END;
macroNodes(POW_D, regex);
}
public static final String BLACKHOLE = PREFIX + "BLACKHOLE" + POSTFIX;
static {
fromBeforeRemoveUselessToFinalCode(BLACKHOLE, "Blackhole");