From 97ade57fb244b17e93b150b7f9e025a5ba906bb2 Mon Sep 17 00:00:00 2001 From: Dingli Zhang Date: Fri, 19 May 2023 03:09:13 +0000 Subject: [PATCH] 8307609: RISC-V: Added support for Extract, Compress, Expand and other nodes for Vector API Co-authored-by: zifeihan Reviewed-by: fyang, fjiang --- src/hotspot/cpu/riscv/assembler_riscv.hpp | 27 +- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 171 +- .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 26 +- .../cpu/riscv/macroAssembler_riscv.hpp | 8 +- src/hotspot/cpu/riscv/riscv.ad | 79 - src/hotspot/cpu/riscv/riscv_v.ad | 1521 +++++++++++++++-- 6 files changed, 1570 insertions(+), 262 deletions(-) diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index c44fb8bee5a..bb7e2f5021f 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -1311,6 +1311,9 @@ enum VectorMask { INSN(vsrl_vi, 0b1010111, 0b011, 0b101000); INSN(vsll_vi, 0b1010111, 0b011, 0b100101); + // Vector Slide Instructions + INSN(vslidedown_vi, 0b1010111, 0b011, 0b001111); + #undef INSN #define INSN(NAME, op, funct3, funct6) \ @@ -1511,6 +1514,9 @@ enum VectorMask { INSN(vadd_vx, 0b1010111, 0b100, 0b000000); INSN(vrsub_vx, 0b1010111, 0b100, 0b000011); + // Vector Slide Instructions + INSN(vslidedown_vx, 0b1010111, 0b100, 0b001111); + #undef INSN #define INSN(NAME, op, funct3, vm, funct6) \ @@ -1523,6 +1529,16 @@ enum VectorMask { #undef INSN +#define INSN(NAME, op, funct3, vm, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1) { \ + patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \ + } + + // Vector Floating-Point Merge Instruction + INSN(vfmerge_vfm, 0b1010111, 0b101, 0b0, 0b010111); + +#undef INSN + #define INSN(NAME, op, funct3, funct6) \ void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1, VectorMask vm = unmasked) { \ patch_VArith(op, Vd, funct3, Rs1->raw_encoding(), Vs2, vm, funct6); \ @@ -1761,16 +1777,11 @@ enum Nf { } // Vector unordered indexed load instructions - INSN(vluxei8_v, 0b0000111, 0b000, 0b01, 0b0); - INSN(vluxei16_v, 0b0000111, 0b101, 0b01, 0b0); INSN(vluxei32_v, 0b0000111, 0b110, 0b01, 0b0); - INSN(vluxei64_v, 0b0000111, 0b111, 0b01, 0b0); - // Vector ordered indexed load instructions - INSN(vloxei8_v, 0b0000111, 0b000, 0b11, 0b0); - INSN(vloxei16_v, 0b0000111, 0b101, 0b11, 0b0); - INSN(vloxei32_v, 0b0000111, 0b110, 0b11, 0b0); - INSN(vloxei64_v, 0b0000111, 0b111, 0b11, 0b0); + // Vector unordered indexed store instructions + INSN(vsuxei32_v, 0b0100111, 0b110, 0b01, 0b0); + #undef INSN #define INSN(NAME, op, width, mop, mew) \ diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index ed18cd90c39..742b468ca62 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -1639,33 +1639,65 @@ void C2_MacroAssembler::minmax_fp_v(VectorRegister dst, VectorRegister src1, Vec vfadd_vv(dst, src2, src2, Assembler::v0_t); } +// Set dst to NaN if any NaN input. +// The destination vector register elements corresponding to masked-off elements +// are handled with a mask-undisturbed policy. +void C2_MacroAssembler::minmax_fp_masked_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, + VectorRegister vmask, VectorRegister tmp1, VectorRegister tmp2, + bool is_double, bool is_min, int vector_length) { + assert_different_registers(src1, src2, tmp1, tmp2); + vsetvli_helper(is_double ? T_DOUBLE : T_FLOAT, vector_length); + + // Check vector elements of src1 and src2 for NaN. + vmfeq_vv(tmp1, src1, src1); + vmfeq_vv(tmp2, src2, src2); + + vmandn_mm(v0, vmask, tmp1); + vfadd_vv(dst, src1, src1, Assembler::v0_t); + vmandn_mm(v0, vmask, tmp2); + vfadd_vv(dst, src2, src2, Assembler::v0_t); + + vmand_mm(tmp2, tmp1, tmp2); + vmand_mm(v0, vmask, tmp2); + is_min ? vfmin_vv(dst, src1, src2, Assembler::v0_t) + : vfmax_vv(dst, src1, src2, Assembler::v0_t); +} + // Set dst to NaN if any NaN input. void C2_MacroAssembler::reduce_minmax_fp_v(FloatRegister dst, FloatRegister src1, VectorRegister src2, VectorRegister tmp1, VectorRegister tmp2, - bool is_double, bool is_min, int vector_length) { + bool is_double, bool is_min, int vector_length, VectorMask vm) { + assert_different_registers(dst, src1); assert_different_registers(src2, tmp1, tmp2); - Label L_done, L_NaN; + Label L_done, L_NaN_1, L_NaN_2; + // Set dst to src1 if src1 is NaN + is_double ? feq_d(t0, src1, src1) + : feq_s(t0, src1, src1); + beqz(t0, L_NaN_2); + vsetvli_helper(is_double ? T_DOUBLE : T_FLOAT, vector_length); vfmv_s_f(tmp2, src1); - is_min ? vfredmin_vs(tmp1, src2, tmp2) - : vfredmax_vs(tmp1, src2, tmp2); + is_min ? vfredmin_vs(tmp1, src2, tmp2, vm) + : vfredmax_vs(tmp1, src2, tmp2, vm); + vfmv_f_s(dst, tmp1); - fsflags(zr); - // Checking NaNs - vmflt_vf(tmp2, src2, src1); - frflags(t0); - bnez(t0, L_NaN); + // Checking NaNs in src2 + vmfne_vv(tmp1, src2, src2, vm); + vcpop_m(t0, tmp1, vm); + beqz(t0, L_done); + + bind(L_NaN_1); + vfredusum_vs(tmp1, src2, tmp2, vm); + vfmv_f_s(dst, tmp1); j(L_done); - bind(L_NaN); - vfmv_s_f(tmp2, src1); - vfredusum_vs(tmp1, src2, tmp2); - + bind(L_NaN_2); + is_double ? fmv_d(dst, src1) + : fmv_s(dst, src1); bind(L_done); - vfmv_f_s(dst, tmp1); } bool C2_MacroAssembler::in_scratch_emit_size() { @@ -1678,39 +1710,35 @@ bool C2_MacroAssembler::in_scratch_emit_size() { return MacroAssembler::in_scratch_emit_size(); } -void C2_MacroAssembler::reduce_integral_v(Register dst, VectorRegister tmp, - Register src1, VectorRegister src2, - BasicType bt, int opc, int vector_length) { +void C2_MacroAssembler::reduce_integral_v(Register dst, Register src1, + VectorRegister src2, VectorRegister tmp, + int opc, BasicType bt, int vector_length, VectorMask vm) { assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported element type"); - vsetvli_helper(bt, vector_length); - vmv_s_x(tmp, src1); - switch (opc) { case Op_AddReductionVI: case Op_AddReductionVL: - vredsum_vs(tmp, src2, tmp); + vredsum_vs(tmp, src2, tmp, vm); break; case Op_AndReductionV: - vredand_vs(tmp, src2, tmp); + vredand_vs(tmp, src2, tmp, vm); break; case Op_OrReductionV: - vredor_vs(tmp, src2, tmp); + vredor_vs(tmp, src2, tmp, vm); break; case Op_XorReductionV: - vredxor_vs(tmp, src2, tmp); + vredxor_vs(tmp, src2, tmp, vm); break; case Op_MaxReductionV: - vredmax_vs(tmp, src2, tmp); + vredmax_vs(tmp, src2, tmp, vm); break; case Op_MinReductionV: - vredmin_vs(tmp, src2, tmp); + vredmin_vs(tmp, src2, tmp, vm); break; default: ShouldNotReachHere(); } - vmv_x_s(dst, tmp); } @@ -1728,8 +1756,8 @@ void C2_MacroAssembler::vsetvli_helper(BasicType bt, int vector_length, LMUL vlm } } -void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int vector_length, - VectorRegister src1, VectorRegister src2, int cond, VectorMask vm) { +void C2_MacroAssembler::compare_integral_v(VectorRegister vd, VectorRegister src1, VectorRegister src2, + int cond, BasicType bt, int vector_length, VectorMask vm) { assert(is_integral_type(bt), "unsupported element type"); assert(vm == Assembler::v0_t ? vd != v0 : true, "should be different registers"); vsetvli_helper(bt, vector_length); @@ -1747,42 +1775,19 @@ void C2_MacroAssembler::compare_integral_v(VectorRegister vd, BasicType bt, int } } -void C2_MacroAssembler::compare_floating_point_v(VectorRegister vd, BasicType bt, int vector_length, - VectorRegister src1, VectorRegister src2, - VectorRegister tmp1, VectorRegister tmp2, - VectorRegister vmask, int cond, VectorMask vm) { +void C2_MacroAssembler::compare_fp_v(VectorRegister vd, VectorRegister src1, VectorRegister src2, + int cond, BasicType bt, int vector_length, VectorMask vm) { assert(is_floating_point_type(bt), "unsupported element type"); - assert(vd != v0, "should be different registers"); - assert(vm == Assembler::v0_t ? vmask != v0 : true, "vmask should not be v0"); + assert(vm == Assembler::v0_t ? vd != v0 : true, "should be different registers"); vsetvli_helper(bt, vector_length); - // Check vector elements of src1 and src2 for quiet and signaling NaN. - vfclass_v(tmp1, src1); - vfclass_v(tmp2, src2); - vsrl_vi(tmp1, tmp1, 8); - vsrl_vi(tmp2, tmp2, 8); - vmseq_vx(tmp1, tmp1, zr); - vmseq_vx(tmp2, tmp2, zr); - if (vm == Assembler::v0_t) { - vmand_mm(tmp2, tmp1, tmp2); - if (cond == BoolTest::ne) { - vmandn_mm(tmp1, vmask, tmp2); - } - vmand_mm(v0, vmask, tmp2); - } else { - vmand_mm(v0, tmp1, tmp2); - if (cond == BoolTest::ne) { - vmnot_m(tmp1, v0); - } - } vmclr_m(vd); switch (cond) { - case BoolTest::eq: vmfeq_vv(vd, src1, src2, Assembler::v0_t); break; - case BoolTest::ne: vmfne_vv(vd, src1, src2, Assembler::v0_t); - vmor_mm(vd, vd, tmp1); break; - case BoolTest::le: vmfle_vv(vd, src1, src2, Assembler::v0_t); break; - case BoolTest::ge: vmfge_vv(vd, src1, src2, Assembler::v0_t); break; - case BoolTest::lt: vmflt_vv(vd, src1, src2, Assembler::v0_t); break; - case BoolTest::gt: vmfgt_vv(vd, src1, src2, Assembler::v0_t); break; + case BoolTest::eq: vmfeq_vv(vd, src1, src2, vm); break; + case BoolTest::ne: vmfne_vv(vd, src1, src2, vm); break; + case BoolTest::le: vmfle_vv(vd, src1, src2, vm); break; + case BoolTest::ge: vmfge_vv(vd, src1, src2, vm); break; + case BoolTest::lt: vmflt_vv(vd, src1, src2, vm); break; + case BoolTest::gt: vmfgt_vv(vd, src1, src2, vm); break; default: assert(false, "unsupported compare condition"); ShouldNotReachHere(); @@ -1863,10 +1868,8 @@ void C2_MacroAssembler::integer_narrow_v(VectorRegister dst, BasicType dst_bt, i #define VFCVT_SAFE(VFLOATCVT) \ void C2_MacroAssembler::VFLOATCVT##_safe(VectorRegister dst, VectorRegister src) { \ assert_different_registers(dst, src); \ - vfclass_v(v0, src); \ vxor_vv(dst, dst, dst); \ - vsrl_vi(v0, v0, 8); \ - vmseq_vx(v0, v0, zr); \ + vmfeq_vv(v0, src, src); \ VFLOATCVT(dst, src, Assembler::v0_t); \ } @@ -1875,3 +1878,43 @@ VFCVT_SAFE(vfwcvt_rtz_x_f_v); VFCVT_SAFE(vfncvt_rtz_x_f_w); #undef VFCVT_SAFE + +// Extract a scalar element from an vector at position 'idx'. +// The input elements in src are expected to be of integral type. +void C2_MacroAssembler::extract_v(Register dst, VectorRegister src, BasicType bt, + int idx, VectorRegister tmp) { + assert(is_integral_type(bt), "unsupported element type"); + assert(idx >= 0, "idx cannot be negative"); + // Only need the first element after vector slidedown + vsetvli_helper(bt, 1); + if (idx == 0) { + vmv_x_s(dst, src); + } else if (idx <= 31) { + vslidedown_vi(tmp, src, idx); + vmv_x_s(dst, tmp); + } else { + mv(t0, idx); + vslidedown_vx(tmp, src, t0); + vmv_x_s(dst, tmp); + } +} + +// Extract a scalar element from an vector at position 'idx'. +// The input elements in src are expected to be of floating point type. +void C2_MacroAssembler::extract_fp_v(FloatRegister dst, VectorRegister src, BasicType bt, + int idx, VectorRegister tmp) { + assert(is_floating_point_type(bt), "unsupported element type"); + assert(idx >= 0, "idx cannot be negative"); + // Only need the first element after vector slidedown + vsetvli_helper(bt, 1); + if (idx == 0) { + vfmv_f_s(dst, src); + } else if (idx <= 31) { + vslidedown_vi(tmp, src, idx); + vfmv_f_s(dst, tmp); + } else { + mv(t0, idx); + vslidedown_vx(tmp, src, t0); + vfmv_f_s(dst, tmp); + } +} \ No newline at end of file diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index 71e3af91964..aabfb8504bd 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -189,23 +189,28 @@ VectorRegister src1, VectorRegister src2, bool is_double, bool is_min, int vector_length); + void minmax_fp_masked_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, + VectorRegister vmask, VectorRegister tmp1, VectorRegister tmp2, + bool is_double, bool is_min, int vector_length); + void reduce_minmax_fp_v(FloatRegister dst, FloatRegister src1, VectorRegister src2, VectorRegister tmp1, VectorRegister tmp2, - bool is_double, bool is_min, int vector_length); + bool is_double, bool is_min, int vector_length, + VectorMask vm = Assembler::unmasked); - void reduce_integral_v(Register dst, VectorRegister tmp, - Register src1, VectorRegister src2, - BasicType bt, int opc, int vector_length); + void reduce_integral_v(Register dst, Register src1, + VectorRegister src2, VectorRegister tmp, + int opc, BasicType bt, int vector_length, + VectorMask vm = Assembler::unmasked); void vsetvli_helper(BasicType bt, int vector_length, LMUL vlmul = Assembler::m1, Register tmp = t0); - void compare_integral_v(VectorRegister dst, BasicType bt, int vector_length, - VectorRegister src1, VectorRegister src2, int cond, VectorMask vm = Assembler::unmasked); + void compare_integral_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, int cond, + BasicType bt, int vector_length, VectorMask vm = Assembler::unmasked); - void compare_floating_point_v(VectorRegister dst, BasicType bt, int vector_length, - VectorRegister src1, VectorRegister src2, VectorRegister tmp1, VectorRegister tmp2, - VectorRegister vmask, int cond, VectorMask vm = Assembler::unmasked); + void compare_fp_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, int cond, + BasicType bt, int vector_length, VectorMask vm = Assembler::unmasked); // In Matcher::scalable_predicate_reg_slots, // we assume each predicate register is one-eighth of the size of @@ -240,4 +245,7 @@ void vfwcvt_rtz_x_f_v_safe(VectorRegister dst, VectorRegister src); void vfncvt_rtz_x_f_w_safe(VectorRegister dst, VectorRegister src); + void extract_v(Register dst, VectorRegister src, BasicType bt, int idx, VectorRegister tmp); + void extract_fp_v(FloatRegister dst, VectorRegister src, BasicType bt, int idx, VectorRegister tmp); + #endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 9ae5eb679d5..1103edc7b83 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1273,12 +1273,12 @@ public: vnsrl_wx(vd, vs, x0, vm); } - inline void vneg_v(VectorRegister vd, VectorRegister vs) { - vrsub_vx(vd, vs, x0); + inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { + vrsub_vx(vd, vs, x0, vm); } - inline void vfneg_v(VectorRegister vd, VectorRegister vs) { - vfsgnjn_vv(vd, vs, vs); + inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { + vfsgnjn_vv(vd, vs, vs, vm); } inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 38aa5c0ebff..2c6b1b55142 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -1887,85 +1887,6 @@ const bool Matcher::match_rule_supported(int opcode) { return true; // Per default match rules are supported. } -const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) { - return match_rule_supported_vector(opcode, vlen, bt); -} - -// Identify extra cases that we might want to provide match rules for vector nodes and -// other intrinsics guarded with vector length (vlen) and element type (bt). -const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { - return false; - } - - return op_vec_supported(opcode); -} - -const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { - if (!UseRVV) { - return false; - } - switch (opcode) { - case Op_AddVB: - case Op_AddVS: - case Op_AddVI: - case Op_AddVL: - case Op_AddVF: - case Op_AddVD: - case Op_SubVB: - case Op_SubVS: - case Op_SubVI: - case Op_SubVL: - case Op_SubVF: - case Op_SubVD: - case Op_MulVB: - case Op_MulVS: - case Op_MulVI: - case Op_MulVL: - case Op_MulVF: - case Op_MulVD: - case Op_DivVF: - case Op_DivVD: - case Op_VectorLoadMask: - case Op_VectorMaskCmp: - case Op_AndVMask: - case Op_XorVMask: - case Op_OrVMask: - case Op_RShiftVB: - case Op_RShiftVS: - case Op_RShiftVI: - case Op_RShiftVL: - case Op_LShiftVB: - case Op_LShiftVS: - case Op_LShiftVI: - case Op_LShiftVL: - case Op_URShiftVB: - case Op_URShiftVS: - case Op_URShiftVI: - case Op_URShiftVL: - case Op_VectorBlend: - case Op_VectorReinterpret: - break; - case Op_LoadVector: - opcode = Op_LoadVectorMasked; - break; - case Op_StoreVector: - opcode = Op_StoreVectorMasked; - break; - default: - return false; - } - return match_rule_supported_vector(opcode, vlen, bt); -} - -const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { - return false; -} - -const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) { - return false; -} - const RegMask* Matcher::predicate_reg_mask(void) { return &_VMASK_REG_mask; } diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad index bf631034e25..46cbe56df44 100644 --- a/src/hotspot/cpu/riscv/riscv_v.ad +++ b/src/hotspot/cpu/riscv/riscv_v.ad @@ -28,10 +28,6 @@ opclass vmemA(indirect); -source_hpp %{ - bool op_vec_supported(int opcode); -%} - source %{ static void loadStore(C2_MacroAssembler masm, bool is_store, @@ -50,36 +46,53 @@ source %{ } } - bool op_vec_supported(int opcode) { - switch (opcode) { - // No multiply reduction instructions - case Op_MulReductionVD: - case Op_MulReductionVF: - case Op_MulReductionVI: - case Op_MulReductionVL: - // Others - case Op_Extract: - case Op_ExtractB: - case Op_ExtractC: - case Op_ExtractD: - case Op_ExtractF: - case Op_ExtractI: - case Op_ExtractL: - case Op_ExtractS: - case Op_ExtractUB: - // Vector API specific - case Op_LoadVectorGather: - case Op_StoreVectorScatter: - case Op_VectorInsert: - case Op_VectorTest: - case Op_PopCountVI: - case Op_PopCountVL: - return false; - default: - return UseRVV; - } + const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) { + return match_rule_supported_vector(opcode, vlen, bt); } + // Identify extra cases that we might want to provide match rules for vector nodes + // and other intrinsics guarded with vector length (vlen) and element type (bt). + const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + if (!UseRVV) { + return false; + } + + if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { + return false; + } + + switch (opcode) { + case Op_VectorMaskLastTrue: + if (!UseZbb || vlen > XLEN) { + return false; + } + break; + case Op_VectorMaskToLong: + case Op_VectorLongToMask: + if (vlen > XLEN) { + return false; + } + break; + default: + break; + } + return true; + } + + const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { + if (!UseRVV) { + return false; + } + return match_rule_supported_vector(opcode, vlen, bt); + } + + const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { + return false; + } + + const bool Matcher::vector_needs_load_shuffle(BasicType elem_bt, int vlen) { + return false; + } %} definitions %{ @@ -161,8 +174,9 @@ instruct vmaskcmp(vRegMask dst, vReg src1, vReg src2, immI cond) %{ ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); uint vector_length = Matcher::vector_length(this); - __ compare_integral_v(as_VectorRegister($dst$$reg), bt, vector_length, as_VectorRegister($src1$$reg), - as_VectorRegister($src2$$reg), (int)($cond$$constant)); + __ compare_integral_v(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + (int)($cond$$constant), bt, vector_length); %} ins_pipe(pipe_slow); %} @@ -178,44 +192,43 @@ instruct vmaskcmp_masked(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); uint vector_length = Matcher::vector_length(this); - __ compare_integral_v(as_VectorRegister($dst$$reg), bt, vector_length, as_VectorRegister($src1$$reg), - as_VectorRegister($src2$$reg), (int)($cond$$constant), Assembler::v0_t); + __ compare_integral_v(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + (int)($cond$$constant), bt, vector_length, Assembler::v0_t); %} ins_pipe(pipe_slow); %} // vector mask float compare -instruct vmaskcmp_fp(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{ +instruct vmaskcmp_fp(vRegMask dst, vReg src1, vReg src2, immI cond) %{ predicate(Matcher::vector_element_basic_type(n) == T_FLOAT || Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); - effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP v0); - format %{ "vmaskcmp_fp $dst, $src1, $src2, $cond\t# KILL $tmp1, $tmp2" %} + effect(TEMP_DEF dst); + format %{ "vmaskcmp_fp $dst, $src1, $src2, $cond" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); uint vector_length = Matcher::vector_length(this); - __ compare_floating_point_v(as_VectorRegister($dst$$reg), bt, vector_length, - as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), - as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), - as_VectorRegister($v0$$reg), (int)($cond$$constant)); + __ compare_fp_v(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + (int)($cond$$constant), bt, vector_length); %} ins_pipe(pipe_slow); %} -instruct vmaskcmp_fp_masked(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask vmask, vReg tmp1, vReg tmp2, vRegMask_V0 v0) %{ +instruct vmaskcmp_fp_masked(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_FLOAT || Matcher::vector_element_basic_type(n) == T_DOUBLE); - match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond vmask))); - effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP v0); - format %{ "vmaskcmp_fp_masked $dst, $src1, $src2, $cond, $vmask\t# KILL $tmp1, $tmp2, $v0" %} + match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond v0))); + effect(TEMP_DEF dst); + format %{ "vmaskcmp_fp_masked $dst, $src1, $src2, $cond, $v0" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); uint vector_length = Matcher::vector_length(this); - __ compare_floating_point_v(as_VectorRegister($dst$$reg), bt, vector_length, - as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), - as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), - as_VectorRegister($vmask$$reg), (int)($cond$$constant), Assembler::v0_t); + __ compare_fp_v(as_VectorRegister($dst$$reg), + as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), + (int)($cond$$constant), bt, vector_length, Assembler::v0_t); %} ins_pipe(pipe_slow); %} @@ -300,6 +313,41 @@ instruct vabsD(vReg dst, vReg src) %{ ins_pipe(pipe_slow); %} +// vector abs - predicated + +instruct vabs_masked(vReg dst_src, vRegMask_V0 v0, vReg tmp) %{ + match(Set dst_src (AbsVB dst_src v0)); + match(Set dst_src (AbsVS dst_src v0)); + match(Set dst_src (AbsVI dst_src v0)); + match(Set dst_src (AbsVL dst_src v0)); + ins_cost(VEC_COST); + effect(TEMP tmp); + format %{ "vabs_masked $dst_src, $dst_src, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vrsub_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($dst_src$$reg), 0, + Assembler::v0_t); + __ vmax_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($tmp$$reg), + as_VectorRegister($dst_src$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vabs_fp_masked(vReg dst_src, vRegMask_V0 v0) %{ + match(Set dst_src (AbsVF dst_src v0)); + match(Set dst_src (AbsVD dst_src v0)); + ins_cost(VEC_COST); + format %{ "vabs_fp_masked $dst_src, $dst_src, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vfsgnjx_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($dst_src$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector add instruct vaddB(vReg dst, vReg src1, vReg src2) %{ @@ -430,6 +478,22 @@ instruct vand(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +// vector and - predicated + +instruct vand_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (AndV (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vand_masked $dst_src1, $src2, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vand_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector or instruct vor(vReg dst, vReg src1, vReg src2) %{ @@ -446,6 +510,22 @@ instruct vor(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +// vector or - predicated + +instruct vor_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (OrV (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vor_masked $dst_src1, $src2, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vor_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector xor instruct vxor(vReg dst, vReg src1, vReg src2) %{ @@ -462,6 +542,22 @@ instruct vxor(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +// vector xor - predicated + +instruct vxor_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + match(Set dst_src1 (XorV (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vxor_masked $dst_src1, $src2, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vxor_vv(as_VectorRegister($dst_src1$$reg), + as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector float div instruct vdivF(vReg dst, vReg src1, vReg src2) %{ @@ -539,12 +635,44 @@ instruct vmin(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +// vector integer max/min - predicated + +instruct vmax_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) != T_FLOAT && + Matcher::vector_element_basic_type(n) != T_DOUBLE); + match(Set dst_src1 (MaxV (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vmax_masked $dst_src1, $dst_src1, $src2, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vmax_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vmin_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) != T_FLOAT && + Matcher::vector_element_basic_type(n) != T_DOUBLE); + match(Set dst_src1 (MinV (Binary dst_src1 src2) v0)); + ins_cost(VEC_COST); + format %{ "vmin_masked $dst_src1, $dst_src1, $src2, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vmin_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector float-point max/min -instruct vmaxF(vReg dst, vReg src1, vReg src2) %{ +instruct vmaxF(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); match(Set dst (MaxV src1 src2)); - effect(TEMP_DEF dst); + effect(TEMP_DEF dst, TEMP v0); ins_cost(VEC_COST); format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %} ins_encode %{ @@ -555,10 +683,10 @@ instruct vmaxF(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} -instruct vmaxD(vReg dst, vReg src1, vReg src2) %{ +instruct vmaxD(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst (MaxV src1 src2)); - effect(TEMP_DEF dst); + effect(TEMP_DEF dst, TEMP v0); ins_cost(VEC_COST); format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %} ins_encode %{ @@ -569,10 +697,10 @@ instruct vmaxD(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} -instruct vminF(vReg dst, vReg src1, vReg src2) %{ +instruct vminF(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); match(Set dst (MinV src1 src2)); - effect(TEMP_DEF dst); + effect(TEMP_DEF dst, TEMP v0); ins_cost(VEC_COST); format %{ "vminF $dst, $src1, $src2\t#@vminF" %} ins_encode %{ @@ -583,10 +711,10 @@ instruct vminF(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} -instruct vminD(vReg dst, vReg src1, vReg src2) %{ +instruct vminD(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{ predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); match(Set dst (MinV src1 src2)); - effect(TEMP_DEF dst); + effect(TEMP_DEF dst, TEMP v0); ins_cost(VEC_COST); format %{ "vminD $dst, $src1, $src2\t#@vminD" %} ins_encode %{ @@ -597,6 +725,68 @@ instruct vminD(vReg dst, vReg src1, vReg src2) %{ ins_pipe(pipe_slow); %} +// vector float-point max/min - predicated + +instruct vmaxF_masked(vReg dst_src1, vReg src2, vRegMask vmask, vReg tmp1, vReg tmp2, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); + match(Set dst_src1 (MaxV (Binary dst_src1 src2) vmask)); + effect(TEMP_DEF dst_src1, TEMP tmp1, TEMP tmp2, TEMP v0); + ins_cost(VEC_COST); + format %{ "vmaxF_masked $dst_src1, $dst_src1, $src2, $vmask\t# KILL $tmp1, $tmp2, $v0" %} + ins_encode %{ + __ minmax_fp_masked_v(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($vmask$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + false /* is_double */, false /* is_min */, Matcher::vector_length(this)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmaxD_masked(vReg dst_src1, vReg src2, vRegMask vmask, vReg tmp1, vReg tmp2, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); + match(Set dst_src1 (MaxV (Binary dst_src1 src2) vmask)); + effect(TEMP_DEF dst_src1, TEMP tmp1, TEMP tmp2, TEMP v0); + ins_cost(VEC_COST); + format %{ "vmaxD_masked $dst_src1, $dst_src1, $src2, $vmask\t# KILL $tmp1, $tmp2, $v0" %} + ins_encode %{ + __ minmax_fp_masked_v(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($vmask$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + true /* is_double */, false /* is_min */, Matcher::vector_length(this)); + %} + ins_pipe(pipe_slow); +%} + +instruct vminF_masked(vReg dst_src1, vReg src2, vRegMask vmask, vReg tmp1, vReg tmp2, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); + match(Set dst_src1 (MinV (Binary dst_src1 src2) vmask)); + effect(TEMP_DEF dst_src1, TEMP tmp1, TEMP tmp2, TEMP v0); + ins_cost(VEC_COST); + format %{ "vminF_masked $dst_src1, $dst_src1, $src2, $vmask\t# KILL $tmp1, $tmp2, $v0" %} + ins_encode %{ + __ minmax_fp_masked_v(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($vmask$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + false /* is_double */, true /* is_min */, Matcher::vector_length(this)); + %} + ins_pipe(pipe_slow); +%} + +instruct vminD_masked(vReg dst_src1, vReg src2, vRegMask vmask, vReg tmp1, vReg tmp2, vRegMask_V0 v0) %{ + predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); + match(Set dst_src1 (MinV (Binary dst_src1 src2) vmask)); + effect(TEMP_DEF dst_src1, TEMP tmp1, TEMP tmp2, TEMP v0); + ins_cost(VEC_COST); + format %{ "vminD_masked $dst_src1, $dst_src1, $src2, $vmask\t# KILL $tmp1, $tmp2, $v0" %} + ins_encode %{ + __ minmax_fp_masked_v(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg), + as_VectorRegister($src2$$reg), as_VectorRegister($vmask$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + true /* is_double */, true /* is_min */, Matcher::vector_length(this)); + %} + ins_pipe(pipe_slow); +%} + // vector fmla // dst_src1 = dst_src1 + src2 * src3 @@ -627,6 +817,23 @@ instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ ins_pipe(pipe_slow); %} +// vector fmadd - predicated +// dst_src1 = dst_src1 * src2 + src3 + +instruct vfmadd_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary src3 v0))); + match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary src3 v0))); + format %{ "vfmadd_masked $dst_src1, $src2, $src3, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vfmadd_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($src3$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector fmls // dst_src1 = dst_src1 + -src2 * src3 @@ -661,6 +868,23 @@ instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ ins_pipe(pipe_slow); %} +// vector fnmsub - predicated + +// dst_src1 = dst_src1 * -src2 + src3 +instruct vfnmsub_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary src3 v0))); + match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary src3 v0))); + format %{ "vfnmsub_masked $dst_src1, $src2, $src3, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vfnmsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($src3$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector fnmla // dst_src1 = -dst_src1 + -src2 * src3 @@ -695,6 +919,23 @@ instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ ins_pipe(pipe_slow); %} +// vector fnmadd - predicated + +// dst_src1 = -src3 + dst_src1 * -src2 +instruct vfnmadd_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary (NegVF src3) v0))); + match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary (NegVD src3) v0))); + format %{ "vfnmadd_masked $dst_src1, $src2, $src3, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vfnmadd_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($src3$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector fnmls // dst_src1 = -dst_src1 + src2 * src3 @@ -725,6 +966,23 @@ instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ ins_pipe(pipe_slow); %} +// vector vfmsub - predicated + +// dst_src1 = -src3 + dst_src1 * src2 +instruct vfmsub_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ + predicate(UseFMA); + match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary (NegVF src3) v0))); + match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary (NegVD src3) v0))); + format %{ "vfmsub_masked $dst_src1, $src2, $src3, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vfmsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($src3$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector mla // dst_src1 = dst_src1 + src2 * src3 @@ -779,6 +1037,23 @@ instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ ins_pipe(pipe_slow); %} +// vector mla - predicated + +instruct vmla_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ + match(Set dst_src1 (AddVB (Binary dst_src1 (MulVB src2 src3)) v0)); + match(Set dst_src1 (AddVS (Binary dst_src1 (MulVS src2 src3)) v0)); + match(Set dst_src1 (AddVI (Binary dst_src1 (MulVI src2 src3)) v0)); + match(Set dst_src1 (AddVL (Binary dst_src1 (MulVL src2 src3)) v0)); + format %{ "vmla_masked $dst_src1, $src2, $src3, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vmacc_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($src3$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector mls // dst_src1 = dst_src1 - src2 * src3 @@ -833,6 +1108,23 @@ instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ ins_pipe(pipe_slow); %} +// vector mls - predicated + +instruct vmls_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ + match(Set dst_src1 (SubVB (Binary dst_src1 (MulVB src2 src3)) v0)); + match(Set dst_src1 (SubVS (Binary dst_src1 (MulVS src2 src3)) v0)); + match(Set dst_src1 (SubVI (Binary dst_src1 (MulVI src2 src3)) v0)); + match(Set dst_src1 (SubVL (Binary dst_src1 (MulVL src2 src3)) v0)); + format %{ "vmls_masked $dst_src1, $src2, $src3, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($src3$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector mul instruct vmulB(vReg dst, vReg src1, vReg src2) %{ @@ -964,6 +1256,22 @@ instruct vnegL(vReg dst, vReg src) %{ ins_pipe(pipe_slow); %} +// vector neg - predicated + +instruct vneg_masked(vReg dst_src, vRegMask_V0 v0) %{ + match(Set dst_src (NegVI dst_src v0)); + match(Set dst_src (NegVL dst_src v0)); + ins_cost(VEC_COST); + format %{ "vneg_masked $dst_src, $dst_src, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vneg_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector fneg instruct vnegF(vReg dst, vReg src) %{ @@ -988,6 +1296,22 @@ instruct vnegD(vReg dst, vReg src) %{ ins_pipe(pipe_slow); %} +// vector fneg - predicated + +instruct vfneg_masked(vReg dst_src, vRegMask_V0 v0) %{ + match(Set dst_src (NegVF dst_src v0)); + match(Set dst_src (NegVD dst_src v0)); + ins_cost(VEC_COST); + format %{ "vfneg_masked $dst_src, $dst_src, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vfneg_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector and reduction instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ @@ -1002,9 +1326,9 @@ instruct reduce_andI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ "vmv.x.s $dst, $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); %} ins_pipe(pipe_slow); %} @@ -1019,9 +1343,45 @@ instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ "vmv.x.s $dst, $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); + %} + ins_pipe(pipe_slow); +%} + +// vector and reduction - predicated + +instruct reduce_andI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || + Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || + Matcher::vector_element_basic_type(n->in(2)) == T_INT); + match(Set dst (AndReductionV (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_andI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_andL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); + match(Set dst (AndReductionV (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_andL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); %} ins_pipe(pipe_slow); %} @@ -1040,9 +1400,9 @@ instruct reduce_orI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ "vmv.x.s $dst, $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); %} ins_pipe(pipe_slow); %} @@ -1057,9 +1417,45 @@ instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ "vmv.x.s $dst, $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); + %} + ins_pipe(pipe_slow); +%} + +// vector or reduction - predicated + +instruct reduce_orI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || + Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || + Matcher::vector_element_basic_type(n->in(2)) == T_INT); + match(Set dst (OrReductionV (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_orI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); + match(Set dst (OrReductionV (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_orL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); %} ins_pipe(pipe_slow); %} @@ -1078,9 +1474,9 @@ instruct reduce_xorI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ "vmv.x.s $dst, $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); %} ins_pipe(pipe_slow); %} @@ -1095,9 +1491,45 @@ instruct reduce_xorL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ "vmv.x.s $dst, $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); + %} + ins_pipe(pipe_slow); +%} + +// vector xor reduction - predicated + +instruct reduce_xorI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || + Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || + Matcher::vector_element_basic_type(n->in(2)) == T_INT); + match(Set dst (XorReductionV (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_xorI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_xorL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); + match(Set dst (XorReductionV (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_xorL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); %} ins_pipe(pipe_slow); %} @@ -1116,9 +1548,9 @@ instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ "vmv.x.s $dst, $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); %} ins_pipe(pipe_slow); %} @@ -1133,9 +1565,9 @@ instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ "vmv.x.s $dst, $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); %} ins_pipe(pipe_slow); %} @@ -1174,6 +1606,72 @@ instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{ ins_pipe(pipe_slow); %} +// vector add reduction - predicated + +instruct reduce_addI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || + Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || + Matcher::vector_element_basic_type(n->in(2)) == T_INT); + match(Set dst (AddReductionVI (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_addI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); + match(Set dst (AddReductionVL (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_addL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addF_masked(fRegF src1_dst, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + match(Set src1_dst (AddReductionVF (Binary src1_dst src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_addF_masked $src1_dst, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2)); + __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); + __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp$$reg), Assembler::v0_t); + __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addD_masked(fRegD src1_dst, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + match(Set src1_dst (AddReductionVD (Binary src1_dst src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "reduce_addD_masked $src1_dst, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2)); + __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); + __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), + as_VectorRegister($tmp$$reg), Assembler::v0_t); + __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + // vector integer max reduction instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ @@ -1186,9 +1684,9 @@ instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ format %{ "vreduce_maxI $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); %} ins_pipe(pipe_slow); %} @@ -1201,9 +1699,45 @@ instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ format %{ "vreduce_maxL $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); + %} + ins_pipe(pipe_slow); +%} + +// vector integer max reduction - predicated + +instruct vreduce_maxI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || + Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || + Matcher::vector_element_basic_type(n->in(2)) == T_INT); + match(Set dst (MaxReductionV (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "vreduce_maxI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_maxL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); + match(Set dst (MaxReductionV (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "vreduce_maxL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); %} ins_pipe(pipe_slow); %} @@ -1220,9 +1754,9 @@ instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ format %{ "vreduce_minI $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); %} ins_pipe(pipe_slow); %} @@ -1235,9 +1769,45 @@ instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ format %{ "vreduce_minL $dst, $src1, $src2\t# KILL $tmp" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this, $src2); - __ reduce_integral_v($dst$$Register, as_VectorRegister($tmp$$reg), - $src1$$Register, as_VectorRegister($src2$$reg), bt, - this->ideal_Opcode(), Matcher::vector_length(this, $src2)); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2)); + %} + ins_pipe(pipe_slow); +%} + +// vector integer min reduction - predicated + +instruct vreduce_minI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE || + Matcher::vector_element_basic_type(n->in(2)) == T_SHORT || + Matcher::vector_element_basic_type(n->in(2)) == T_INT); + match(Set dst (MinReductionV (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "vreduce_minI_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_minL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG); + match(Set dst (MinReductionV (Binary src1 src2) v0)); + effect(TEMP tmp); + ins_cost(VEC_COST); + format %{ "vreduce_minL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src2); + __ reduce_integral_v($dst$$Register, $src1$$Register, + as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg), + this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2), + Assembler::v0_t); %} ins_pipe(pipe_slow); %} @@ -1274,6 +1844,40 @@ instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ ins_pipe(pipe_slow); %} +// vector float max reduction - predicated + +instruct vreduce_maxF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT); + match(Set dst (MaxReductionV (Binary src1 src2) v0)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vreduce_maxF_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %} + ins_encode %{ + __ reduce_minmax_fp_v($dst$$FloatRegister, + $src1$$FloatRegister, as_VectorRegister($src2$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + false /* is_double */, false /* is_min */, + Matcher::vector_length(this, $src2), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_maxD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE); + match(Set dst (MaxReductionV (Binary src1 src2) v0)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vreduce_maxD_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %} + ins_encode %{ + __ reduce_minmax_fp_v($dst$$FloatRegister, + $src1$$FloatRegister, as_VectorRegister($src2$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + true /* is_double */, false /* is_min */, + Matcher::vector_length(this, $src2), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector float min reduction instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ @@ -1306,6 +1910,40 @@ instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ ins_pipe(pipe_slow); %} +// vector float min reduction - predicated + +instruct vreduce_minF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT); + match(Set dst (MinReductionV (Binary src1 src2) v0)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vreduce_minF_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %} + ins_encode %{ + __ reduce_minmax_fp_v($dst$$FloatRegister, + $src1$$FloatRegister, as_VectorRegister($src2$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + false /* is_double */, true /* is_min */, + Matcher::vector_length(this, $src2), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vreduce_minD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{ + predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE); + match(Set dst (MinReductionV (Binary src1 src2) v0)); + ins_cost(VEC_COST); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vreduce_minD_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %} + ins_encode %{ + __ reduce_minmax_fp_v($dst$$FloatRegister, + $src1$$FloatRegister, as_VectorRegister($src2$$reg), + as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), + true /* is_double */, true /* is_min */, + Matcher::vector_length(this, $src2), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector replicate instruct replicateB(vReg dst, iRegIorL2I src) %{ @@ -1875,6 +2513,73 @@ instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} +instruct vasrB_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + match(Set dst_src (RShiftVB (Binary dst_src (RShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vasrB_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + if (con == 0) { + return; + } + if (con >= BitsPerByte) con = BitsPerByte - 1; + __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); + __ vsra_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrS_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + match(Set dst_src (RShiftVS (Binary dst_src (RShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vasrS_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + if (con == 0) { + return; + } + if (con >= BitsPerShort) con = BitsPerShort - 1; + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + __ vsra_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrI_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + match(Set dst_src (RShiftVI (Binary dst_src (RShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vasrI_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + if (con == 0) { + return; + } + __ vsetvli_helper(T_INT, Matcher::vector_length(this)); + __ vsra_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vasrL_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + predicate((n->in(1)->in(2)->in(1)->get_int() & 0x3f) < 32); + match(Set dst_src (RShiftVL (Binary dst_src (RShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vasrL_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + if (con == 0) { + return; + } + __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); + __ vsra_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ match(Set dst (URShiftVB src (RShiftCntV shift))); ins_cost(VEC_COST); @@ -1954,6 +2659,81 @@ instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} +instruct vlsrB_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + match(Set dst_src (URShiftVB (Binary dst_src (RShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vlsrB_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + if (con == 0) { + return; + } + __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); + if (con >= BitsPerByte) { + __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($dst_src$$reg), Assembler::v0_t); + return; + } + __ vsrl_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrS_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + match(Set dst_src (URShiftVS (Binary dst_src (RShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vlsrS_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + if (con == 0) { + return; + } + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + if (con >= BitsPerShort) { + __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($dst_src$$reg), Assembler::v0_t); + return; + } + __ vsrl_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrI_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + match(Set dst_src (URShiftVI (Binary dst_src (RShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vlsrI_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + if (con == 0) { + return; + } + __ vsetvli_helper(T_INT, Matcher::vector_length(this)); + __ vsrl_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrL_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + predicate((n->in(1)->in(2)->in(1)->get_int() & 0x3f) < 32); + match(Set dst_src (URShiftVL (Binary dst_src (RShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vlsrL_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + if (con == 0) { + return; + } + __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); + __ vsrl_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ match(Set dst (LShiftVB src (LShiftCntV shift))); ins_cost(VEC_COST); @@ -2013,6 +2793,69 @@ instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ ins_pipe(pipe_slow); %} +instruct vlslB_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + match(Set dst_src (LShiftVB (Binary dst_src (LShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vlslB_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli_helper(T_BYTE, Matcher::vector_length(this)); + if (con >= BitsPerByte) { + __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($dst_src$$reg), Assembler::v0_t); + return; + } + __ vsll_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslS_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + match(Set dst_src (LShiftVS (Binary dst_src (LShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vlslS_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli_helper(T_SHORT, Matcher::vector_length(this)); + if (con >= BitsPerShort) { + __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + as_VectorRegister($dst_src$$reg), Assembler::v0_t); + return; + } + __ vsll_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslI_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + match(Set dst_src (LShiftVI (Binary dst_src (LShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vlslI_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli_helper(T_INT, Matcher::vector_length(this)); + __ vsll_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslL_imm_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{ + predicate((n->in(1)->in(2)->in(1)->get_int() & 0x3f) < 32); + match(Set dst_src (LShiftVL (Binary dst_src (LShiftCntV shift)) v0)); + ins_cost(VEC_COST); + format %{ "vlslL_imm_masked $dst_src, $dst_src, $shift, $v0" %} + ins_encode %{ + uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli_helper(T_LONG, Matcher::vector_length(this)); + __ vsll_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con, + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector shift count instruct vshiftcnt(vReg dst, iRegIorL2I cnt) %{ @@ -2051,6 +2894,32 @@ instruct vsqrtD(vReg dst, vReg src) %{ ins_pipe(pipe_slow); %} +// vector sqrt - predicated + +instruct vsqrtF_masked(vReg dst_src, vRegMask_V0 v0) %{ + match(Set dst_src (SqrtVF dst_src v0)); + ins_cost(VEC_COST); + format %{ "vsqrtF_masked $dst_src, $v0" %} + ins_encode %{ + __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this)); + __ vfsqrt_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +instruct vsqrtD_masked(vReg dst_src, vRegMask_V0 v0) %{ + match(Set dst_src (SqrtVD dst_src v0)); + ins_cost(VEC_COST); + format %{ "vsqrtD_masked $dst_src, $v0" %} + ins_encode %{ + __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this)); + __ vfsqrt_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), + Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + // vector sub instruct vsubB(vReg dst, vReg src1, vReg src2) %{ @@ -2440,6 +3309,8 @@ instruct vmask_gen_I(vRegMask dst, iRegI src) %{ ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vmclr_m(as_VectorRegister($dst$$reg)); __ vsetvli(t0, $src$$Register, sew); __ vmset_m(as_VectorRegister($dst$$reg)); %} @@ -2452,6 +3323,8 @@ instruct vmask_gen_L(vRegMask dst, iRegL src) %{ ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vmclr_m(as_VectorRegister($dst$$reg)); __ vsetvli(t0, $src$$Register, sew); __ vmset_m(as_VectorRegister($dst$$reg)); %} @@ -2463,6 +3336,10 @@ instruct vmask_gen_imm(vRegMask dst, immL con) %{ format %{ "vmask_gen_imm $dst, $con" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); + if ($con$$constant != Matcher::vector_length(this)) { + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vmclr_m(as_VectorRegister($dst$$reg)); + } __ vsetvli_helper(bt, (uint)($con$$constant)); __ vmset_m(as_VectorRegister($dst$$reg)); %} @@ -2486,7 +3363,7 @@ instruct vmaskAll_immI(vRegMask dst, immI src) %{ ins_pipe(pipe_slow); %} -instruct vmaskAllI(vRegMask dst, iRegI src) %{ +instruct vmaskAllI(vRegMask dst, iRegIorL2I src) %{ match(Set dst (MaskAll src)); format %{ "vmaskAllI $dst, $src" %} ins_encode %{ @@ -2878,7 +3755,7 @@ instruct vcvtDtoF(vReg dst, vReg src) %{ ins_pipe(pipe_slow); %} -// vector reinterpret +// ------------------------------ Vector reinterpret --------------------------- instruct reinterpret(vReg dst_src) %{ predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); @@ -2951,7 +3828,455 @@ instruct rearrange(vReg dst, vReg src, vReg shuffle) %{ ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); - __ vrgather_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($shuffle$$reg)); + __ vrgather_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shuffle$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct rearrange_masked(vReg dst, vReg src, vReg shuffle, vRegMask_V0 v0) %{ + match(Set dst (VectorRearrange (Binary src shuffle) v0)); + effect(TEMP_DEF dst); + format %{ "rearrange_masked $dst, $src, $shuffle, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), + as_VectorRegister($dst$$reg)); + __ vrgather_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($shuffle$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector extract --------------------------------- + +instruct extract(iRegINoSp dst, vReg src, immI idx, vReg tmp) +%{ + match(Set dst (ExtractB src idx)); + match(Set dst (ExtractS src idx)); + match(Set dst (ExtractI src idx)); + effect(TEMP tmp); + format %{ "extract $dst, $src, $idx\t# KILL $tmp" %} + ins_encode %{ + __ extract_v($dst$$Register, as_VectorRegister($src$$reg), + Matcher::vector_element_basic_type(this, $src), (int)($idx$$constant), + as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct extractL(iRegLNoSp dst, vReg src, immI idx, vReg tmp) +%{ + match(Set dst (ExtractL src idx)); + effect(TEMP tmp); + format %{ "extractL $dst, $src, $idx\t# KILL $tmp" %} + ins_encode %{ + __ extract_v($dst$$Register, as_VectorRegister($src$$reg), T_LONG, + (int)($idx$$constant), as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + + +instruct extractF(fRegF dst, vReg src, immI idx, vReg tmp) +%{ + match(Set dst (ExtractF src idx)); + effect(TEMP tmp); + format %{ "extractF $dst, $src, $idx\t# KILL $tmp" %} + ins_encode %{ + __ extract_fp_v($dst$$FloatRegister, as_VectorRegister($src$$reg), T_FLOAT, + (int)($idx$$constant), as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct extractD(fRegD dst, vReg src, immI idx, vReg tmp) +%{ + match(Set dst (ExtractD src idx)); + effect(TEMP tmp); + format %{ "extractD $dst, $src, $idx\t# KILL $tmp" %} + ins_encode %{ + __ extract_fp_v($dst$$FloatRegister, as_VectorRegister($src$$reg), T_DOUBLE, + (int)($idx$$constant), as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Compress/Expand Operations ------------------- + +instruct mcompress(vRegMask dst, vRegMask src, iRegLNoSp tmp) %{ + match(Set dst (CompressM src)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "mcompress $dst, $src\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vmclr_m(as_VectorRegister($dst$$reg)); + __ vcpop_m($tmp$$Register, as_VectorRegister($src$$reg)); + __ vsetvli(t0, $tmp$$Register, sew); + __ vmset_m(as_VectorRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcompress(vReg dst, vReg src, vRegMask_V0 v0) %{ + match(Set dst (CompressV src v0)); + effect(TEMP_DEF dst); + format %{ "vcompress $dst, $src, $v0" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), + as_VectorRegister($dst$$reg)); + __ vcompress_vm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($v0$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vexpand(vReg dst, vReg src, vRegMask_V0 v0, vReg tmp) %{ + match(Set dst (ExpandV src v0)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "vexpand $dst, $src, $v0\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ viota_m(as_VectorRegister($tmp$$reg), as_VectorRegister($v0$$reg)); + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), + as_VectorRegister($dst$$reg)); + __ vrgather_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($tmp$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Load Gather --------------------------- + +instruct gather_load(vReg dst, indirect mem, vReg idx) %{ + predicate(type2aelembytes(Matcher::vector_element_basic_type(n)) == 4 || + type2aelembytes(Matcher::vector_element_basic_type(n)) == 8); + match(Set dst (LoadVectorGather mem idx)); + effect(TEMP_DEF dst); + format %{ "gather_load $dst, $mem, $idx" %} + ins_encode %{ + __ vmv1r_v(as_VectorRegister($dst$$reg), as_VectorRegister($idx$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), (int)sew); + __ vluxei32_v(as_VectorRegister($dst$$reg), as_Register($mem$$base), + as_VectorRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct gather_load_masked(vReg dst, indirect mem, vReg idx, vRegMask_V0 v0, vReg tmp) %{ + predicate(type2aelembytes(Matcher::vector_element_basic_type(n)) == 4 || + type2aelembytes(Matcher::vector_element_basic_type(n)) == 8); + match(Set dst (LoadVectorGatherMasked mem (Binary idx v0))); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "gather_load_masked $dst, $mem, $idx, $v0\t# KILL $tmp" %} + ins_encode %{ + __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew); + __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), + as_VectorRegister($dst$$reg)); + __ vluxei32_v(as_VectorRegister($dst$$reg), as_Register($mem$$base), + as_VectorRegister($tmp$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector Store Scatter ------------------------- + +instruct scatter_store(indirect mem, vReg src, vReg idx, vReg tmp) %{ + predicate(type2aelembytes(Matcher::vector_element_basic_type(n->in(3)->in(1))) == 4 || + type2aelembytes(Matcher::vector_element_basic_type(n->in(3)->in(1))) == 8); + match(Set mem (StoreVectorScatter mem (Binary src idx))); + effect(TEMP tmp); + format %{ "scatter_store $mem, $idx, $src\t# KILL $tmp" %} + ins_encode %{ + __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli_helper(bt, Matcher::vector_length(this, $src)); + __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew); + __ vsuxei32_v(as_VectorRegister($src$$reg), as_Register($mem$$base), + as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct scatter_store_masked(indirect mem, vReg src, vReg idx, vRegMask_V0 v0, vReg tmp) %{ + predicate(type2aelembytes(Matcher::vector_element_basic_type(n->in(3)->in(1))) == 4 || + type2aelembytes(Matcher::vector_element_basic_type(n->in(3)->in(1))) == 8); + match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx v0)))); + effect(TEMP tmp); + format %{ "scatter_store_masked $mem, $idx, $src, $v0\t# KILL $tmp" %} + ins_encode %{ + __ vmv1r_v(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg)); + BasicType bt = Matcher::vector_element_basic_type(this, $src); + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli_helper(bt, Matcher::vector_length(this, $src)); + __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew); + __ vsuxei32_v(as_VectorRegister($src$$reg), as_Register($mem$$base), + as_VectorRegister($tmp$$reg), Assembler::v0_t); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Populate Index to a Vector ------------------- + +instruct populateindex(vReg dst, iRegIorL2I src1, iRegIorL2I src2, vReg tmp) %{ + match(Set dst (PopulateIndex src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "populateindex $dst, $src1, $src2\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src1$$reg)); + __ vid_v(as_VectorRegister($tmp$$reg)); + __ vmacc_vx(as_VectorRegister($dst$$reg), as_Register($src2$$reg), as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector insert -------------------------------- + +// BYTE, SHORT, INT + +instruct insertI_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx, vRegMask_V0 v0) %{ + predicate(n->in(2)->get_int() < 32 && + (Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP v0); + format %{ "insertI_index_lt32 $dst, $src, $val, $idx" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vid_v(as_VectorRegister($v0$$reg)); + __ vadd_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), -16); + __ vmseq_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), (int)($idx$$constant) - 16); + __ vmerge_vxm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct insertI_index(vReg dst, vReg src, iRegIorL2I val, iRegIorL2I idx, vReg tmp, vRegMask_V0 v0) %{ + predicate(n->in(2)->get_int() >= 32 && + (Matcher::vector_element_basic_type(n) == T_BYTE || + Matcher::vector_element_basic_type(n) == T_SHORT || + Matcher::vector_element_basic_type(n) == T_INT)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP tmp, TEMP v0); + format %{ "insertI_index $dst, $src, $val, $idx\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vid_v(as_VectorRegister($v0$$reg)); + __ vmv_v_x(as_VectorRegister($tmp$$reg), $idx$$Register); + __ vmseq_vv(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), as_VectorRegister($tmp$$reg)); + __ vmerge_vxm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +// LONG + +instruct insertL_index_lt32(vReg dst, vReg src, iRegL val, immI idx, vRegMask_V0 v0) %{ + predicate(n->in(2)->get_int() < 32 && + (Matcher::vector_element_basic_type(n) == T_LONG)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP v0); + format %{ "insertL_index_lt32 $dst, $src, $val, $idx" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vid_v(as_VectorRegister($v0$$reg)); + __ vadd_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), -16); + __ vmseq_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), (int)($idx$$constant) - 16); + __ vmerge_vxm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct insertL_index(vReg dst, vReg src, iRegL val, iRegIorL2I idx, vReg tmp, vRegMask_V0 v0) %{ + predicate(n->in(2)->get_int() >= 32 && + (Matcher::vector_element_basic_type(n) == T_LONG)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP tmp, TEMP v0); + format %{ "insertL_index $dst, $src, $val, $idx\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + __ vsetvli_helper(bt, Matcher::vector_length(this)); + __ vid_v(as_VectorRegister($v0$$reg)); + __ vmv_v_x(as_VectorRegister($tmp$$reg), $idx$$Register); + __ vmseq_vv(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), as_VectorRegister($tmp$$reg)); + __ vmerge_vxm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +// FLOAT + +instruct insertF_index_lt32(vReg dst, vReg src, fRegF val, immI idx, vRegMask_V0 v0) %{ + predicate(n->in(2)->get_int() < 32 && + (Matcher::vector_element_basic_type(n) == T_FLOAT)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP v0); + format %{ "insertF_index_lt32 $dst, $src, $val, $idx" %} + ins_encode %{ + __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this)); + __ vid_v(as_VectorRegister($v0$$reg)); + __ vadd_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), -16); + __ vmseq_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), (int)($idx$$constant) - 16); + __ vfmerge_vfm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct insertF_index(vReg dst, vReg src, fRegF val, iRegIorL2I idx, vReg tmp, vRegMask_V0 v0) %{ + predicate(n->in(2)->get_int() >= 32 && + (Matcher::vector_element_basic_type(n) == T_FLOAT)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP tmp, TEMP v0); + format %{ "insertF_index $dst, $src, $val, $idx\t# KILL $tmp" %} + ins_encode %{ + __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this)); + __ vid_v(as_VectorRegister($v0$$reg)); + __ vmv_v_x(as_VectorRegister($tmp$$reg), $idx$$Register); + __ vmseq_vv(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), as_VectorRegister($tmp$$reg)); + __ vfmerge_vfm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +// DOUBLE + +instruct insertD_index_lt32(vReg dst, vReg src, fRegD val, immI idx, vRegMask_V0 v0) %{ + predicate(n->in(2)->get_int() < 32 && + (Matcher::vector_element_basic_type(n) == T_DOUBLE)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP v0); + format %{ "insertD_index_lt32 $dst, $src, $val, $idx" %} + ins_encode %{ + __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this)); + __ vid_v(as_VectorRegister($v0$$reg)); + __ vadd_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), -16); + __ vmseq_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), (int)($idx$$constant) - 16); + __ vfmerge_vfm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct insertD_index(vReg dst, vReg src, fRegD val, iRegIorL2I idx, vReg tmp, vRegMask_V0 v0) %{ + predicate(n->in(2)->get_int() >= 32 && + (Matcher::vector_element_basic_type(n) == T_DOUBLE)); + match(Set dst (VectorInsert (Binary src val) idx)); + effect(TEMP tmp, TEMP v0); + format %{ "insertD_index $dst, $src, $val, $idx\t# KILL $tmp" %} + ins_encode %{ + __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this)); + __ vid_v(as_VectorRegister($v0$$reg)); + __ vmv_v_x(as_VectorRegister($tmp$$reg), $idx$$Register); + __ vmseq_vv(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), as_VectorRegister($tmp$$reg)); + __ vfmerge_vfm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$FloatRegister); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector mask reductions ----------------------- + +// true count + +instruct vmask_truecount(iRegINoSp dst, vRegMask src) %{ + match(Set dst (VectorMaskTrueCount src)); + format %{ "vmask_truecount $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + __ vsetvli_helper(bt, Matcher::vector_length(this, $src)); + __ vcpop_m($dst$$Register, as_VectorRegister($src$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// first true + +// Return the index of the first mask lane that is set, or vector length if none of +// them are set. + +instruct vmask_firsttrue(iRegINoSp dst, vRegMask src, vRegMask tmp) %{ + match(Set dst (VectorMaskFirstTrue src)); + effect(TEMP tmp); + format %{ "vmask_firsttrue $dst, $src\t# KILL $tmp" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this, $src); + __ vsetvli_helper(bt, Matcher::vector_length(this, $src)); + __ vmsbf_m(as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); + __ vcpop_m($dst$$Register, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); +%} + +// last true + +// Return the index of the first last lane that is set, or -1 if none of +// them are set. + +instruct vmask_lasttrue(iRegINoSp dst, vRegMask src) %{ + match(Set dst (VectorMaskLastTrue src)); + format %{ "vmask_lasttrue $dst, $src" %} + ins_encode %{ + uint vector_length = Matcher::vector_length(this, $src); + assert(UseZbb && vector_length <= XLEN, "precondition"); + __ vsetvli_helper(T_LONG, 1); + __ vmv_x_s($dst$$Register, as_VectorRegister($src$$reg)); + if (XLEN != vector_length) { + __ slli($dst$$Register, $dst$$Register, XLEN - vector_length); + __ srli($dst$$Register, $dst$$Register, XLEN - vector_length); + } + __ clz($dst$$Register, $dst$$Register); + __ mv(t0, XLEN - 1); + __ sub($dst$$Register, t0, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +// tolong + +instruct vmask_tolong(iRegLNoSp dst, vRegMask src) %{ + match(Set dst (VectorMaskToLong src)); + format %{ "vmask_tolong $dst, $src" %} + ins_encode %{ + uint vector_length = Matcher::vector_length(this, $src); + assert(vector_length <= XLEN, "precondition"); + __ vsetvli_helper(T_LONG, 1); + __ vmv_x_s($dst$$Register, as_VectorRegister($src$$reg)); + if (XLEN != vector_length) { + __ slli($dst$$Register, $dst$$Register, XLEN - vector_length); + __ srli($dst$$Register, $dst$$Register, XLEN - vector_length); + } + %} + ins_pipe(pipe_slow); +%} + +// fromlong + +instruct vmask_fromlong(vRegMask dst, iRegL src) %{ + match(Set dst (VectorLongToMask src)); + format %{ "vmask_fromlong $dst, $src" %} + ins_encode %{ + assert(Matcher::vector_length(this) <= XLEN, "precondition"); + __ vsetvli_helper(T_LONG, 1); + __ vmv_s_x(as_VectorRegister($dst$$reg), $src$$Register); %} ins_pipe(pipe_slow); %} \ No newline at end of file