8321003: RISC-V: C2 MulReductionVI

8321004: RISC-V: C2 MulReductionVL

Reviewed-by: fyang, rehn
This commit is contained in:
Hamlin Li 2025-02-21 10:25:50 +00:00
parent c73fead5ca
commit 1b6281d98c
6 changed files with 124 additions and 1 deletions

View File

@ -2954,6 +2954,45 @@ void C2_MacroAssembler::reduce_integral_v(Register dst, Register src1,
vmv_x_s(dst, tmp);
}
void C2_MacroAssembler::reduce_mul_integral_v(Register dst, Register src1, VectorRegister src2,
VectorRegister vtmp1, VectorRegister vtmp2,
BasicType bt, uint vector_length, VectorMask vm) {
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported element type");
vsetvli_helper(bt, vector_length);
vector_length /= 2;
if (vm != Assembler::unmasked) {
// This behaviour is consistent with spec requirements of vector API, for `reduceLanes`:
// If no elements are selected, an operation-specific identity value is returned.
// If the operation is MUL, then the identity value is one.
vmv_v_i(vtmp1, 1);
vmerge_vvm(vtmp2, vtmp1, src2); // vm == v0
vslidedown_vi(vtmp1, vtmp2, vector_length);
vsetvli_helper(bt, vector_length);
vmul_vv(vtmp1, vtmp1, vtmp2);
} else {
vslidedown_vi(vtmp1, src2, vector_length);
vsetvli_helper(bt, vector_length);
vmul_vv(vtmp1, vtmp1, src2);
}
while (vector_length > 1) {
vector_length /= 2;
vslidedown_vi(vtmp2, vtmp1, vector_length);
vsetvli_helper(bt, vector_length);
vmul_vv(vtmp1, vtmp1, vtmp2);
}
vmv_x_s(dst, vtmp1);
if (bt == T_INT) {
mulw(dst, dst, src1);
} else {
mul(dst, dst, src1);
}
}
// Set vl and vtype for full and partial vector operations.
// (vma = mu, vta = tu, vill = false)
void C2_MacroAssembler::vsetvli_helper(BasicType bt, uint vector_length, LMUL vlmul, Register tmp) {

View File

@ -239,6 +239,10 @@
int opc, BasicType bt, uint vector_length,
VectorMask vm = Assembler::unmasked);
void reduce_mul_integral_v(Register dst, Register src1, VectorRegister src2,
VectorRegister vtmp1, VectorRegister vtmp2, BasicType bt,
uint vector_length, VectorMask vm = Assembler::unmasked);
void vsetvli_helper(BasicType bt, uint vector_length, LMUL vlmul = Assembler::m1, Register tmp = t0);
void compare_integral_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, int cond,

View File

@ -2,6 +2,7 @@
// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2020, 2023, Arm Limited. All rights reserved.
// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
// Copyright (c) 2023, 2025, Rivos Inc. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -99,6 +100,12 @@ source %{
return false;
}
break;
case Op_MulReductionVI:
case Op_MulReductionVL:
// When vlen < 4, our log2(vlen) implementation does not help to gain performance improvement.
if (vlen < 4) {
return false;
}
default:
break;
}
@ -2427,6 +2434,67 @@ instruct vreduce_minD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, v
ins_pipe(pipe_slow);
%}
// ------------------------------ Vector reduction mul -------------------------
instruct reduce_mulI(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
vReg tmp1, vReg tmp2) %{
match(Set dst (MulReductionVI isrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "reduce_mulI $dst, $isrc, $vsrc\t" %}
ins_encode %{
__ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc));
%}
ins_pipe(pipe_slow);
%}
instruct reduce_mulI_masked(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
match(Set dst (MulReductionVI (Binary isrc vsrc) v0));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "reduce_mulI_masked $dst, $isrc, $vsrc, $v0\t" %}
ins_encode %{
__ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc),
Assembler::v0_t);
%}
ins_pipe(pipe_slow);
%}
instruct reduce_mulL(iRegLNoSp dst, iRegL isrc, vReg vsrc,
vReg tmp1, vReg tmp2) %{
match(Set dst (MulReductionVL isrc vsrc));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "reduce_mulL $dst, $isrc, $vsrc\t" %}
ins_encode %{
__ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc));
%}
ins_pipe(pipe_slow);
%}
instruct reduce_mulL_masked(iRegLNoSp dst, iRegL isrc, vReg vsrc,
vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
match(Set dst (MulReductionVL (Binary isrc vsrc) v0));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "reduce_mulL_masked $dst, $isrc, $vsrc, $v0\t" %}
ins_encode %{
__ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc),
Assembler::v0_t);
%}
ins_pipe(pipe_slow);
%}
// vector replicate
instruct replicate(vReg dst, iRegIorL2I src) %{

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -85,6 +85,10 @@ public class ProdRed_Int {
@IR(applyIfCPUFeature = {"sse4.1", "true"},
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
counts = {IRNode.MUL_REDUCTION_VI, ">= 1", IRNode.MUL_REDUCTION_VI, "<= 2"}) // one for main-loop, one for vector-post-loop
@IR(applyIfPlatform = {"riscv64", "true"},
applyIfCPUFeature = {"rvv", "true"},
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
counts = {IRNode.MUL_REDUCTION_VI, ">= 1", IRNode.MUL_REDUCTION_VI, "<= 2"}) // one for main-loop, one for vector-post-loop
public static int prodReductionImplement(int[] a, int[] b, int total) {
for (int i = 0; i < a.length; i++) {
total *= a[i] + b[i];

View File

@ -219,6 +219,10 @@ public class RedTest_int {
@IR(applyIfCPUFeature = {"sse4.1", "true"},
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
counts = {IRNode.MUL_REDUCTION_VI, ">= 1", IRNode.MUL_REDUCTION_VI, "<= 2"}) // one for main-loop, one for vector-post-loop
@IR(applyIfPlatform = {"riscv64", "true"},
applyIfCPUFeature = {"rvv", "true"},
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
counts = {IRNode.MUL_REDUCTION_VI, ">= 1", IRNode.MUL_REDUCTION_VI, "<= 2"}) // one for main-loop, one for vector-post-loop
public static int mulReductionImplement(
int[] a,
int[] b,

View File

@ -226,6 +226,10 @@ public class RedTest_long {
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
applyIfPlatform = {"64-bit", "true"},
counts = {IRNode.MUL_REDUCTION_VL, ">= 1", IRNode.MUL_REDUCTION_VL, "<= 2"}) // one for main-loop, one for vector-post-loop
@IR(applyIfPlatform = {"riscv64", "true"},
applyIfCPUFeature = {"rvv", "true"},
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
counts = {IRNode.MUL_REDUCTION_VL, ">= 1", IRNode.MUL_REDUCTION_VL, "<= 2"}) // one for main-loop, one for vector-post-loop
public static long mulReductionImplement(
long[] a,
long[] b,