mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8361582: AArch64: Some ConH values cannot be replicated with SVE
Reviewed-by: shade, epeter, aph
This commit is contained in:
parent
fc77e7600f
commit
7f0cd6488b
@ -4412,10 +4412,9 @@ operand immI8()
|
||||
%}
|
||||
|
||||
// 8 bit signed value (simm8), or #simm8 LSL 8.
|
||||
operand immI8_shift8()
|
||||
operand immIDupV()
|
||||
%{
|
||||
predicate((n->get_int() <= 127 && n->get_int() >= -128) ||
|
||||
(n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0));
|
||||
predicate(Assembler::operand_valid_for_sve_dup_immediate((int64_t)n->get_int()));
|
||||
match(ConI);
|
||||
|
||||
op_cost(0);
|
||||
@ -4424,10 +4423,9 @@ operand immI8_shift8()
|
||||
%}
|
||||
|
||||
// 8 bit signed value (simm8), or #simm8 LSL 8.
|
||||
operand immL8_shift8()
|
||||
operand immLDupV()
|
||||
%{
|
||||
predicate((n->get_long() <= 127 && n->get_long() >= -128) ||
|
||||
(n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0));
|
||||
predicate(Assembler::operand_valid_for_sve_dup_immediate(n->get_long()));
|
||||
match(ConL);
|
||||
|
||||
op_cost(0);
|
||||
@ -4435,6 +4433,17 @@ operand immL8_shift8()
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
// 8 bit signed value (simm8), or #simm8 LSL 8.
|
||||
operand immHDupV()
|
||||
%{
|
||||
predicate(Assembler::operand_valid_for_sve_dup_immediate((int64_t)n->geth()));
|
||||
match(ConH);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
// 8 bit integer valid for vector add sub immediate
|
||||
operand immBAddSubV()
|
||||
%{
|
||||
@ -7077,18 +7086,16 @@ instruct loadConD(vRegD dst, immD con) %{
|
||||
%}
|
||||
|
||||
// Load Half Float Constant
|
||||
// The "ldr" instruction loads a 32-bit word from the constant pool into a
|
||||
// 32-bit register but only the bottom half will be populated and the top
|
||||
// 16 bits are zero.
|
||||
instruct loadConH(vRegF dst, immH con) %{
|
||||
match(Set dst con);
|
||||
format %{
|
||||
"ldrs $dst, [$constantaddress]\t# load from constant table: half float=$con\n\t"
|
||||
%}
|
||||
format %{ "mov rscratch1, $con\n\t"
|
||||
"fmov $dst, rscratch1"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
|
||||
__ movw(rscratch1, (uint32_t)$con$$constant);
|
||||
__ fmovs($dst$$FloatRegister, rscratch1);
|
||||
%}
|
||||
ins_pipe(fp_load_constant_s);
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
// Store Instructions
|
||||
|
||||
@ -4875,7 +4875,7 @@ instruct replicateB_imm8_gt128b(vReg dst, immI8 con) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct replicateI_imm8_gt128b(vReg dst, immI8_shift8 con) %{
|
||||
instruct replicateI_imm8_gt128b(vReg dst, immIDupV con) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) > 16 &&
|
||||
(Matcher::vector_element_basic_type(n) == T_SHORT ||
|
||||
Matcher::vector_element_basic_type(n) == T_INT));
|
||||
@ -4898,7 +4898,7 @@ instruct replicateL_imm_128b(vReg dst, immL con) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct replicateL_imm8_gt128b(vReg dst, immL8_shift8 con) %{
|
||||
instruct replicateL_imm8_gt128b(vReg dst, immLDupV con) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) > 16);
|
||||
match(Set dst (Replicate con));
|
||||
format %{ "replicateL_imm8_gt128b $dst, $con\t# vector > 128 bits" %}
|
||||
@ -4909,19 +4909,27 @@ instruct replicateL_imm8_gt128b(vReg dst, immL8_shift8 con) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Replicate a 16-bit half precision float value
|
||||
instruct replicateHF_imm(vReg dst, immH con) %{
|
||||
// Replicate an immediate 16-bit half precision float value
|
||||
instruct replicateHF_imm_le128b(vReg dst, immH con) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) <= 16);
|
||||
match(Set dst (Replicate con));
|
||||
format %{ "replicateHF_imm $dst, $con\t# replicate immediate half-precision float" %}
|
||||
format %{ "replicateHF_imm_le128b $dst, $con\t# vector <= 128 bits" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
int imm = (int)($con$$constant) & 0xffff;
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
__ mov($dst$$FloatRegister, get_arrangement(this), imm);
|
||||
} else { // length_in_bytes must be > 16 and SVE should be enabled
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_dup($dst$$FloatRegister, __ H, imm);
|
||||
}
|
||||
__ mov($dst$$FloatRegister, get_arrangement(this), imm);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Replicate a 16-bit half precision float which is within the limits
|
||||
// for the operand - immHDupV
|
||||
instruct replicateHF_imm8_gt128b(vReg dst, immHDupV con) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) > 16);
|
||||
match(Set dst (Replicate con));
|
||||
format %{ "replicateHF_imm8_gt128b $dst, $con\t# vector > 128 bits" %}
|
||||
ins_encode %{
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_dup($dst$$FloatRegister, __ H, (int)($con$$constant));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
@ -3107,7 +3107,7 @@ instruct replicateB_imm8_gt128b(vReg dst, immI8 con) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct replicateI_imm8_gt128b(vReg dst, immI8_shift8 con) %{
|
||||
instruct replicateI_imm8_gt128b(vReg dst, immIDupV con) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) > 16 &&
|
||||
(Matcher::vector_element_basic_type(n) == T_SHORT ||
|
||||
Matcher::vector_element_basic_type(n) == T_INT));
|
||||
@ -3130,7 +3130,7 @@ instruct replicateL_imm_128b(vReg dst, immL con) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct replicateL_imm8_gt128b(vReg dst, immL8_shift8 con) %{
|
||||
instruct replicateL_imm8_gt128b(vReg dst, immLDupV con) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) > 16);
|
||||
match(Set dst (Replicate con));
|
||||
format %{ "replicateL_imm8_gt128b $dst, $con\t# vector > 128 bits" %}
|
||||
@ -3141,19 +3141,27 @@ instruct replicateL_imm8_gt128b(vReg dst, immL8_shift8 con) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Replicate a 16-bit half precision float value
|
||||
instruct replicateHF_imm(vReg dst, immH con) %{
|
||||
// Replicate an immediate 16-bit half precision float value
|
||||
instruct replicateHF_imm_le128b(vReg dst, immH con) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) <= 16);
|
||||
match(Set dst (Replicate con));
|
||||
format %{ "replicateHF_imm $dst, $con\t# replicate immediate half-precision float" %}
|
||||
format %{ "replicateHF_imm_le128b $dst, $con\t# vector <= 128 bits" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
int imm = (int)($con$$constant) & 0xffff;
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
__ mov($dst$$FloatRegister, get_arrangement(this), imm);
|
||||
} else { // length_in_bytes must be > 16 and SVE should be enabled
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_dup($dst$$FloatRegister, __ H, imm);
|
||||
}
|
||||
__ mov($dst$$FloatRegister, get_arrangement(this), imm);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Replicate a 16-bit half precision float which is within the limits
|
||||
// for the operand - immHDupV
|
||||
instruct replicateHF_imm8_gt128b(vReg dst, immHDupV con) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) > 16);
|
||||
match(Set dst (Replicate con));
|
||||
format %{ "replicateHF_imm8_gt128b $dst, $con\t# vector > 128 bits" %}
|
||||
ins_encode %{
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_dup($dst$$FloatRegister, __ H, (int)($con$$constant));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
@ -434,6 +434,11 @@ int Assembler::operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool Assembler::operand_valid_for_sve_dup_immediate(int64_t imm) {
|
||||
return ((imm >= -128 && imm <= 127) ||
|
||||
(((imm & 0xff) == 0) && imm >= -32768 && imm <= 32512));
|
||||
}
|
||||
|
||||
bool Assembler::operand_valid_for_sve_logical_immediate(unsigned elembits, uint64_t imm) {
|
||||
return encode_sve_logical_immediate(elembits, imm) != 0xffffffff;
|
||||
}
|
||||
|
||||
@ -4324,6 +4324,7 @@ public:
|
||||
static bool operand_valid_for_sve_add_sub_immediate(int64_t imm);
|
||||
static bool operand_valid_for_float_immediate(double imm);
|
||||
static int operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement T);
|
||||
static bool operand_valid_for_sve_dup_immediate(int64_t imm);
|
||||
|
||||
void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
|
||||
void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
|
||||
|
||||
136
test/hotspot/jtreg/compiler/c2/aarch64/TestFloat16Replicate.java
Normal file
136
test/hotspot/jtreg/compiler/c2/aarch64/TestFloat16Replicate.java
Normal file
@ -0,0 +1,136 @@
|
||||
/* Copyright (c) 2025, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8361582
|
||||
* @summary Ensure the correct backend replicate node is being generated for
|
||||
* half precision float constants on >16B SVE machines
|
||||
* @modules jdk.incubator.vector
|
||||
* @library /test/lib /
|
||||
* @run main/othervm compiler.c2.aarch64.TestFloat16Replicate
|
||||
*/
|
||||
|
||||
package compiler.c2.aarch64;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
import compiler.lib.verify.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
import jdk.incubator.vector.Float16;
|
||||
import jdk.test.lib.*;
|
||||
import jdk.test.lib.Utils;
|
||||
|
||||
import static java.lang.Float.*;
|
||||
import static jdk.incubator.vector.Float16.*;
|
||||
|
||||
public class TestFloat16Replicate {
|
||||
private static short[] input;
|
||||
private static short[] output;
|
||||
private static short[] expected;
|
||||
private static Random rnd;
|
||||
|
||||
// Choose FP16_IMM8 which is within the range of [-128 << 8, 127 << 8] and a multiple of 256
|
||||
private static final Float16 FP16_IMM8;
|
||||
|
||||
// Choose a value in the range [-128 << 8, 127 << 8] and a non multiple of 256 for FP16_NON_IMM8
|
||||
private static final Float16 FP16_NON_IMM8;
|
||||
|
||||
private static final int LEN = 1024;
|
||||
|
||||
public static void main(String args[]) {
|
||||
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
|
||||
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:-TieredCompilation");
|
||||
}
|
||||
|
||||
static {
|
||||
rnd = Utils.getRandomInstance();
|
||||
int k = rnd.nextInt(-128, 128);
|
||||
int b = rnd.nextInt(1, 256);
|
||||
short bits_imm8 = (short) (k << 8);
|
||||
short bits_non_imm8 = (short) ((k << 8) + b);
|
||||
|
||||
FP16_IMM8 = Float16.shortBitsToFloat16(bits_imm8);
|
||||
FP16_NON_IMM8 = Float16.shortBitsToFloat16(bits_non_imm8);
|
||||
|
||||
input = new short[LEN];
|
||||
output = new short[LEN];
|
||||
expected = new short[LEN];
|
||||
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
input[i] = (short) i;
|
||||
}
|
||||
}
|
||||
|
||||
// For vectorizable loops containing FP16 operations with an FP16 constant as one of the inputs, the IR
|
||||
// node `(dst (Replicate con))` is generated to broadcast the constant into all lanes of an SVE register.
|
||||
// On SVE-capable hardware with vector length > 16B, if the FP16 immediate is a signed value within the
|
||||
// range [-128, 127] or a signed multiple of 256 in the range [-32768, 32512] for element widths of
|
||||
// 16 bits or higher then the backend should generate the "replicateHF_imm_gt128b" machnode.
|
||||
@Test
|
||||
@Warmup(5000)
|
||||
@IR(counts = {IRNode.REPLICATE_HF_IMM8, ">0"},
|
||||
phase = CompilePhase.FINAL_CODE,
|
||||
applyIf = {"MaxVectorSize", ">16"},
|
||||
applyIfCPUFeature = {"sve", "true"})
|
||||
public void TestFloat16AddInRange() {
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
output[i] = float16ToRawShortBits(Float16.add(shortBitsToFloat16(input[i]), FP16_IMM8));
|
||||
}
|
||||
}
|
||||
|
||||
@Check(test="TestFloat16AddInRange")
|
||||
public void checkResultFloat16AddInRange() {
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
expected[i] = floatToFloat16(float16ToFloat(input[i]) + FP16_IMM8.floatValue());
|
||||
}
|
||||
Verify.checkEQWithRawBits(output, expected);
|
||||
}
|
||||
|
||||
// For vectorizable loops containing FP16 operations with an FP16 constant as one of the inputs, the IR
|
||||
// node `(dst (Replicate con))` is generated to broadcast the constant into all lanes of an SVE register.
|
||||
// On SVE-capable hardware with vector length > 16B, if the FP16 constant falls outside the immediate
|
||||
// range accepted by the SVE "dup" instruction, the backend must:
|
||||
// 1. Generate the "loadConH" machnode to load the FP16 constant from the constant pool.
|
||||
// 2. Emit the "replicateHF" machnode to broadcast this loaded constant into an SVE register.
|
||||
// In this case, the backend should not generate the "replicateHF_imm8_gt128b" machnode.
|
||||
@Test
|
||||
@Warmup(5000)
|
||||
@IR(counts = {IRNode.REPLICATE_HF, ">0"},
|
||||
failOn = {IRNode.REPLICATE_HF_IMM8},
|
||||
phase = CompilePhase.FINAL_CODE,
|
||||
applyIf = {"MaxVectorSize", ">16"},
|
||||
applyIfCPUFeature = {"sve", "true"})
|
||||
public void TestFloat16AddOutOfRange() {
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
output[i] = float16ToRawShortBits(add(shortBitsToFloat16(input[i]), FP16_NON_IMM8));
|
||||
}
|
||||
}
|
||||
|
||||
@Check(test="TestFloat16AddOutOfRange")
|
||||
public void checkResultFloat16AddOutOfRange() {
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
expected[i] = floatToFloat16(float16ToFloat(input[i]) + FP16_NON_IMM8.floatValue());
|
||||
}
|
||||
Verify.checkEQWithRawBits(output, expected);
|
||||
}
|
||||
}
|
||||
@ -2896,6 +2896,16 @@ public class IRNode {
|
||||
vectorNode(SELECT_FROM_TWO_VECTOR_VL, "SelectFromTwoVector", TYPE_LONG);
|
||||
}
|
||||
|
||||
public static final String REPLICATE_HF = PREFIX + "REPLICATE_HF" + POSTFIX;
|
||||
static {
|
||||
machOnlyNameRegex(REPLICATE_HF, "replicateHF");
|
||||
}
|
||||
|
||||
public static final String REPLICATE_HF_IMM8 = PREFIX + "REPLICATE_HF_IMM8" + POSTFIX;
|
||||
static {
|
||||
machOnlyNameRegex(REPLICATE_HF_IMM8, "replicateHF_imm8_gt128b");
|
||||
}
|
||||
|
||||
/*
|
||||
* Utility methods to set up IR_NODE_MAPPINGS.
|
||||
*/
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user