From 134b63f0e8c4093f7ad0a528d6996898ab881d5c Mon Sep 17 00:00:00 2001 From: Yuri Gaevsky Date: Fri, 3 Oct 2025 09:44:56 +0000 Subject: [PATCH] 8322174: RISC-V: C2 VectorizedHashCode RVV Version Reviewed-by: fyang, rehn --- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 147 ++++++++++++++++-- .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 8 +- src/hotspot/cpu/riscv/riscv.ad | 1 + src/hotspot/cpu/riscv/riscv_v.ad | 22 +++ .../cpu/riscv/stubDeclarations_riscv.hpp | 3 + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 22 +++ 6 files changed, 185 insertions(+), 18 deletions(-) diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 1bdb7bc2f7c..154b62db47f 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -1687,6 +1687,7 @@ void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register res Register tmp4, Register tmp5, Register tmp6, BasicType eltype) { + assert(!UseRVV, "sanity"); assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, t0, t1); const int elsize = arrays_hashcode_elsize(eltype); @@ -1759,29 +1760,143 @@ void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register res BLOCK_COMMENT("} // arrays_hashcode"); } +void C2_MacroAssembler::arrays_hashcode_v(Register ary, Register cnt, Register result, + Register tmp1, Register tmp2, Register tmp3, + BasicType eltype) +{ + assert(UseRVV, "sanity"); + assert(StubRoutines::riscv::arrays_hashcode_powers_of_31() != nullptr, "sanity"); + assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, t0, t1); + + // The MaxVectorSize should have been set by detecting RVV max vector register + // size when check UseRVV (i.e. MaxVectorSize == VM_Version::_initial_vector_length). + // Let's use T_INT as all hashCode calculations eventually deal with ints. + const int lmul = 2; + const int stride = MaxVectorSize / sizeof(jint) * lmul; + + const int elsize_bytes = arrays_hashcode_elsize(eltype); + const int elsize_shift = exact_log2(elsize_bytes); + + switch (eltype) { + case T_BOOLEAN: BLOCK_COMMENT("arrays_hashcode_v(unsigned byte) {"); break; + case T_CHAR: BLOCK_COMMENT("arrays_hashcode_v(char) {"); break; + case T_BYTE: BLOCK_COMMENT("arrays_hashcode_v(byte) {"); break; + case T_SHORT: BLOCK_COMMENT("arrays_hashcode_v(short) {"); break; + case T_INT: BLOCK_COMMENT("arrays_hashcode_v(int) {"); break; + default: + ShouldNotReachHere(); + } + + const Register pow31_highest = tmp1; + const Register ary_end = tmp2; + const Register consumed = tmp3; + + const VectorRegister v_sum = v2; + const VectorRegister v_src = v4; + const VectorRegister v_coeffs = v6; + const VectorRegister v_tmp = v8; + + const address adr_pows31 = StubRoutines::riscv::arrays_hashcode_powers_of_31() + + sizeof(jint); + Label VEC_LOOP, DONE, SCALAR_TAIL, SCALAR_TAIL_LOOP; + + // NB: at this point (a) 'result' already has some value, + // (b) 'cnt' is not 0 or 1, see java code for details. + + andi(t0, cnt, ~(stride - 1)); + beqz(t0, SCALAR_TAIL); + + la(t1, ExternalAddress(adr_pows31)); + lw(pow31_highest, Address(t1, -1 * sizeof(jint))); + + vsetvli(consumed, cnt, Assembler::e32, Assembler::m2); + vle32_v(v_coeffs, t1); // 31^^(stride - 1) ... 31^^0 + vmv_v_x(v_sum, x0); + + bind(VEC_LOOP); + arrays_hashcode_elload_v(v_src, v_tmp, ary, eltype); + vmul_vv(v_src, v_src, v_coeffs); + vmadd_vx(v_sum, pow31_highest, v_src); + mulw(result, result, pow31_highest); + shadd(ary, consumed, ary, t0, elsize_shift); + subw(cnt, cnt, consumed); + andi(t1, cnt, ~(stride - 1)); + bnez(t1, VEC_LOOP); + + vmv_s_x(v_tmp, x0); + vredsum_vs(v_sum, v_sum, v_tmp); + vmv_x_s(t0, v_sum); + addw(result, result, t0); + beqz(cnt, DONE); + + bind(SCALAR_TAIL); + shadd(ary_end, cnt, ary, t0, elsize_shift); + + bind(SCALAR_TAIL_LOOP); + arrays_hashcode_elload(t0, Address(ary), eltype); + slli(t1, result, 5); // optimize 31 * result + subw(result, t1, result); // with result<<5 - result + addw(result, result, t0); + addi(ary, ary, elsize_bytes); + bne(ary, ary_end, SCALAR_TAIL_LOOP); + + bind(DONE); + BLOCK_COMMENT("} // arrays_hashcode_v"); +} + int C2_MacroAssembler::arrays_hashcode_elsize(BasicType eltype) { switch (eltype) { - case T_BOOLEAN: return sizeof(jboolean); - case T_BYTE: return sizeof(jbyte); - case T_SHORT: return sizeof(jshort); - case T_CHAR: return sizeof(jchar); - case T_INT: return sizeof(jint); - default: - ShouldNotReachHere(); - return -1; + case T_BOOLEAN: return sizeof(jboolean); + case T_BYTE: return sizeof(jbyte); + case T_SHORT: return sizeof(jshort); + case T_CHAR: return sizeof(jchar); + case T_INT: return sizeof(jint); + default: + ShouldNotReachHere(); + return -1; } } void C2_MacroAssembler::arrays_hashcode_elload(Register dst, Address src, BasicType eltype) { switch (eltype) { - // T_BOOLEAN used as surrogate for unsigned byte - case T_BOOLEAN: lbu(dst, src); break; - case T_BYTE: lb(dst, src); break; - case T_SHORT: lh(dst, src); break; - case T_CHAR: lhu(dst, src); break; - case T_INT: lw(dst, src); break; - default: - ShouldNotReachHere(); + // T_BOOLEAN used as surrogate for unsigned byte + case T_BOOLEAN: lbu(dst, src); break; + case T_BYTE: lb(dst, src); break; + case T_SHORT: lh(dst, src); break; + case T_CHAR: lhu(dst, src); break; + case T_INT: lw(dst, src); break; + default: + ShouldNotReachHere(); + } +} + +void C2_MacroAssembler::arrays_hashcode_elload_v(VectorRegister vdst, + VectorRegister vtmp, + Register src, + BasicType eltype) { + assert_different_registers(vdst, vtmp); + switch (eltype) { + case T_BOOLEAN: + vle8_v(vtmp, src); + vzext_vf4(vdst, vtmp); + break; + case T_BYTE: + vle8_v(vtmp, src); + vsext_vf4(vdst, vtmp); + break; + case T_CHAR: + vle16_v(vtmp, src); + vzext_vf2(vdst, vtmp); + break; + case T_SHORT: + vle16_v(vtmp, src); + vsext_vf2(vdst, vtmp); + break; + case T_INT: + vle32_v(vdst, src); + break; + default: + ShouldNotReachHere(); } } diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index 309ef8d9d5e..2d5339dc153 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -92,11 +92,15 @@ Register tmp3, Register tmp4, Register tmp5, Register tmp6, BasicType eltype); - - // helper function for arrays_hashcode int arrays_hashcode_elsize(BasicType eltype); void arrays_hashcode_elload(Register dst, Address src, BasicType eltype); + void arrays_hashcode_v(Register ary, Register cnt, Register result, + Register tmp1, Register tmp2, Register tmp3, + BasicType eltype); + void arrays_hashcode_elload_v(VectorRegister vdst, VectorRegister vtmp, + Register src, BasicType eltype); + void string_equals(Register r1, Register r2, Register result, Register cnt1); diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 1f14c499c34..009acd628a0 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -10991,6 +10991,7 @@ instruct arrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI ba iRegLNoSp tmp3, iRegLNoSp tmp4, iRegLNoSp tmp5, iRegLNoSp tmp6, rFlagsReg cr) %{ + predicate(!UseRVV); match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, USE_KILL ary, USE_KILL cnt, USE basic_type, KILL cr); diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad index f2845ee2a6c..fe323474d60 100644 --- a/src/hotspot/cpu/riscv/riscv_v.ad +++ b/src/hotspot/cpu/riscv/riscv_v.ad @@ -4080,6 +4080,28 @@ instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ins_pipe(pipe_class_memory); %} +// fast ArraysSupport.vectorizedHashCode +instruct varrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI basic_type, + vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, + vReg_V6 v6, vReg_V7 v7, vReg_V8 v8, vReg_V9 v9, + iRegLNoSp tmp1, iRegLNoSp tmp2, iRegLNoSp tmp3, + rFlagsReg cr) +%{ + predicate(UseRVV); + match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type))); + effect(USE_KILL ary, USE_KILL cnt, USE basic_type, + TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, + TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ "Array HashCode array[] $ary,$cnt,$result,$basic_type -> $result // KILL all" %} + ins_encode %{ + __ arrays_hashcode_v($ary$$Register, $cnt$$Register, $result$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + (BasicType)$basic_type$$constant); + %} + ins_pipe(pipe_class_memory); +%} + instruct vstring_compareU_128b(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, iRegI_R10 result, vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7, vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11, diff --git a/src/hotspot/cpu/riscv/stubDeclarations_riscv.hpp b/src/hotspot/cpu/riscv/stubDeclarations_riscv.hpp index fe7f52884fa..f977d759d20 100644 --- a/src/hotspot/cpu/riscv/stubDeclarations_riscv.hpp +++ b/src/hotspot/cpu/riscv/stubDeclarations_riscv.hpp @@ -73,6 +73,9 @@ do_stub(compiler, string_indexof_linear_ul) \ do_arch_entry(riscv, compiler, string_indexof_linear_ul, \ string_indexof_linear_ul, string_indexof_linear_ul) \ + do_stub(compiler, arrays_hashcode_powers_of_31) \ + do_arch_entry(riscv, compiler, arrays_hashcode_powers_of_31, \ + arrays_hashcode_powers_of_31, arrays_hashcode_powers_of_31) \ #define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \ diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index 88961ccd5a4..ec268d9bb65 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -6624,6 +6624,24 @@ static const int64_t right_3_bits = right_n_bits(3); return start; } + address generate_arrays_hashcode_powers_of_31() { + assert(UseRVV, "sanity"); + const int lmul = 2; + const int stride = MaxVectorSize / sizeof(jint) * lmul; + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "arrays_hashcode_powers_of_31"); + address start = __ pc(); + for (int i = stride; i >= 0; i--) { + jint power_of_31 = 1; + for (int j = i; j > 0; j--) { + power_of_31 = java_multiply(power_of_31, 31); + } + __ emit_int32(power_of_31); + } + + return start; + } + #endif // COMPILER2 /** @@ -6818,6 +6836,10 @@ static const int64_t right_3_bits = right_n_bits(3); StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); } + if (UseVectorizedHashCodeIntrinsic && UseRVV) { + StubRoutines::riscv::_arrays_hashcode_powers_of_31 = generate_arrays_hashcode_powers_of_31(); + } + if (UseSHA256Intrinsics) { Sha2Generator sha2(_masm, this); StubRoutines::_sha256_implCompress = sha2.generate_sha256_implCompress(StubId::stubgen_sha256_implCompress_id);