8322174: RISC-V: C2 VectorizedHashCode RVV Version

Reviewed-by: fyang, rehn
This commit is contained in:
Yuri Gaevsky 2025-10-03 09:44:56 +00:00 committed by Vladimir Kempik
parent 2e783963d2
commit 134b63f0e8
6 changed files with 185 additions and 18 deletions

View File

@ -1687,6 +1687,7 @@ void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register res
Register tmp4, Register tmp5, Register tmp6,
BasicType eltype)
{
assert(!UseRVV, "sanity");
assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, t0, t1);
const int elsize = arrays_hashcode_elsize(eltype);
@ -1759,29 +1760,143 @@ void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register res
BLOCK_COMMENT("} // arrays_hashcode");
}
void C2_MacroAssembler::arrays_hashcode_v(Register ary, Register cnt, Register result,
Register tmp1, Register tmp2, Register tmp3,
BasicType eltype)
{
assert(UseRVV, "sanity");
assert(StubRoutines::riscv::arrays_hashcode_powers_of_31() != nullptr, "sanity");
assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, t0, t1);
// The MaxVectorSize should have been set by detecting RVV max vector register
// size when check UseRVV (i.e. MaxVectorSize == VM_Version::_initial_vector_length).
// Let's use T_INT as all hashCode calculations eventually deal with ints.
const int lmul = 2;
const int stride = MaxVectorSize / sizeof(jint) * lmul;
const int elsize_bytes = arrays_hashcode_elsize(eltype);
const int elsize_shift = exact_log2(elsize_bytes);
switch (eltype) {
case T_BOOLEAN: BLOCK_COMMENT("arrays_hashcode_v(unsigned byte) {"); break;
case T_CHAR: BLOCK_COMMENT("arrays_hashcode_v(char) {"); break;
case T_BYTE: BLOCK_COMMENT("arrays_hashcode_v(byte) {"); break;
case T_SHORT: BLOCK_COMMENT("arrays_hashcode_v(short) {"); break;
case T_INT: BLOCK_COMMENT("arrays_hashcode_v(int) {"); break;
default:
ShouldNotReachHere();
}
const Register pow31_highest = tmp1;
const Register ary_end = tmp2;
const Register consumed = tmp3;
const VectorRegister v_sum = v2;
const VectorRegister v_src = v4;
const VectorRegister v_coeffs = v6;
const VectorRegister v_tmp = v8;
const address adr_pows31 = StubRoutines::riscv::arrays_hashcode_powers_of_31()
+ sizeof(jint);
Label VEC_LOOP, DONE, SCALAR_TAIL, SCALAR_TAIL_LOOP;
// NB: at this point (a) 'result' already has some value,
// (b) 'cnt' is not 0 or 1, see java code for details.
andi(t0, cnt, ~(stride - 1));
beqz(t0, SCALAR_TAIL);
la(t1, ExternalAddress(adr_pows31));
lw(pow31_highest, Address(t1, -1 * sizeof(jint)));
vsetvli(consumed, cnt, Assembler::e32, Assembler::m2);
vle32_v(v_coeffs, t1); // 31^^(stride - 1) ... 31^^0
vmv_v_x(v_sum, x0);
bind(VEC_LOOP);
arrays_hashcode_elload_v(v_src, v_tmp, ary, eltype);
vmul_vv(v_src, v_src, v_coeffs);
vmadd_vx(v_sum, pow31_highest, v_src);
mulw(result, result, pow31_highest);
shadd(ary, consumed, ary, t0, elsize_shift);
subw(cnt, cnt, consumed);
andi(t1, cnt, ~(stride - 1));
bnez(t1, VEC_LOOP);
vmv_s_x(v_tmp, x0);
vredsum_vs(v_sum, v_sum, v_tmp);
vmv_x_s(t0, v_sum);
addw(result, result, t0);
beqz(cnt, DONE);
bind(SCALAR_TAIL);
shadd(ary_end, cnt, ary, t0, elsize_shift);
bind(SCALAR_TAIL_LOOP);
arrays_hashcode_elload(t0, Address(ary), eltype);
slli(t1, result, 5); // optimize 31 * result
subw(result, t1, result); // with result<<5 - result
addw(result, result, t0);
addi(ary, ary, elsize_bytes);
bne(ary, ary_end, SCALAR_TAIL_LOOP);
bind(DONE);
BLOCK_COMMENT("} // arrays_hashcode_v");
}
int C2_MacroAssembler::arrays_hashcode_elsize(BasicType eltype) {
switch (eltype) {
case T_BOOLEAN: return sizeof(jboolean);
case T_BYTE: return sizeof(jbyte);
case T_SHORT: return sizeof(jshort);
case T_CHAR: return sizeof(jchar);
case T_INT: return sizeof(jint);
default:
ShouldNotReachHere();
return -1;
case T_BOOLEAN: return sizeof(jboolean);
case T_BYTE: return sizeof(jbyte);
case T_SHORT: return sizeof(jshort);
case T_CHAR: return sizeof(jchar);
case T_INT: return sizeof(jint);
default:
ShouldNotReachHere();
return -1;
}
}
void C2_MacroAssembler::arrays_hashcode_elload(Register dst, Address src, BasicType eltype) {
switch (eltype) {
// T_BOOLEAN used as surrogate for unsigned byte
case T_BOOLEAN: lbu(dst, src); break;
case T_BYTE: lb(dst, src); break;
case T_SHORT: lh(dst, src); break;
case T_CHAR: lhu(dst, src); break;
case T_INT: lw(dst, src); break;
default:
ShouldNotReachHere();
// T_BOOLEAN used as surrogate for unsigned byte
case T_BOOLEAN: lbu(dst, src); break;
case T_BYTE: lb(dst, src); break;
case T_SHORT: lh(dst, src); break;
case T_CHAR: lhu(dst, src); break;
case T_INT: lw(dst, src); break;
default:
ShouldNotReachHere();
}
}
void C2_MacroAssembler::arrays_hashcode_elload_v(VectorRegister vdst,
VectorRegister vtmp,
Register src,
BasicType eltype) {
assert_different_registers(vdst, vtmp);
switch (eltype) {
case T_BOOLEAN:
vle8_v(vtmp, src);
vzext_vf4(vdst, vtmp);
break;
case T_BYTE:
vle8_v(vtmp, src);
vsext_vf4(vdst, vtmp);
break;
case T_CHAR:
vle16_v(vtmp, src);
vzext_vf2(vdst, vtmp);
break;
case T_SHORT:
vle16_v(vtmp, src);
vsext_vf2(vdst, vtmp);
break;
case T_INT:
vle32_v(vdst, src);
break;
default:
ShouldNotReachHere();
}
}

View File

@ -92,11 +92,15 @@
Register tmp3, Register tmp4,
Register tmp5, Register tmp6,
BasicType eltype);
// helper function for arrays_hashcode
int arrays_hashcode_elsize(BasicType eltype);
void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
void arrays_hashcode_v(Register ary, Register cnt, Register result,
Register tmp1, Register tmp2, Register tmp3,
BasicType eltype);
void arrays_hashcode_elload_v(VectorRegister vdst, VectorRegister vtmp,
Register src, BasicType eltype);
void string_equals(Register r1, Register r2,
Register result, Register cnt1);

View File

@ -10991,6 +10991,7 @@ instruct arrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI ba
iRegLNoSp tmp3, iRegLNoSp tmp4,
iRegLNoSp tmp5, iRegLNoSp tmp6, rFlagsReg cr)
%{
predicate(!UseRVV);
match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
USE_KILL ary, USE_KILL cnt, USE basic_type, KILL cr);

View File

@ -4080,6 +4080,28 @@ instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
ins_pipe(pipe_class_memory);
%}
// fast ArraysSupport.vectorizedHashCode
instruct varrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI basic_type,
vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
vReg_V6 v6, vReg_V7 v7, vReg_V8 v8, vReg_V9 v9,
iRegLNoSp tmp1, iRegLNoSp tmp2, iRegLNoSp tmp3,
rFlagsReg cr)
%{
predicate(UseRVV);
match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type)));
effect(USE_KILL ary, USE_KILL cnt, USE basic_type,
TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9,
TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
format %{ "Array HashCode array[] $ary,$cnt,$result,$basic_type -> $result // KILL all" %}
ins_encode %{
__ arrays_hashcode_v($ary$$Register, $cnt$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
(BasicType)$basic_type$$constant);
%}
ins_pipe(pipe_class_memory);
%}
instruct vstring_compareU_128b(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
iRegI_R10 result, vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11,

View File

@ -73,6 +73,9 @@
do_stub(compiler, string_indexof_linear_ul) \
do_arch_entry(riscv, compiler, string_indexof_linear_ul, \
string_indexof_linear_ul, string_indexof_linear_ul) \
do_stub(compiler, arrays_hashcode_powers_of_31) \
do_arch_entry(riscv, compiler, arrays_hashcode_powers_of_31, \
arrays_hashcode_powers_of_31, arrays_hashcode_powers_of_31) \
#define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \

View File

@ -6624,6 +6624,24 @@ static const int64_t right_3_bits = right_n_bits(3);
return start;
}
address generate_arrays_hashcode_powers_of_31() {
assert(UseRVV, "sanity");
const int lmul = 2;
const int stride = MaxVectorSize / sizeof(jint) * lmul;
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "arrays_hashcode_powers_of_31");
address start = __ pc();
for (int i = stride; i >= 0; i--) {
jint power_of_31 = 1;
for (int j = i; j > 0; j--) {
power_of_31 = java_multiply(power_of_31, 31);
}
__ emit_int32(power_of_31);
}
return start;
}
#endif // COMPILER2
/**
@ -6818,6 +6836,10 @@ static const int64_t right_3_bits = right_n_bits(3);
StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
}
if (UseVectorizedHashCodeIntrinsic && UseRVV) {
StubRoutines::riscv::_arrays_hashcode_powers_of_31 = generate_arrays_hashcode_powers_of_31();
}
if (UseSHA256Intrinsics) {
Sha2Generator sha2(_masm, this);
StubRoutines::_sha256_implCompress = sha2.generate_sha256_implCompress(StubId::stubgen_sha256_implCompress_id);