mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8322174: RISC-V: C2 VectorizedHashCode RVV Version
Reviewed-by: fyang, rehn
This commit is contained in:
parent
2e783963d2
commit
134b63f0e8
@ -1687,6 +1687,7 @@ void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register res
|
||||
Register tmp4, Register tmp5, Register tmp6,
|
||||
BasicType eltype)
|
||||
{
|
||||
assert(!UseRVV, "sanity");
|
||||
assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, t0, t1);
|
||||
|
||||
const int elsize = arrays_hashcode_elsize(eltype);
|
||||
@ -1759,29 +1760,143 @@ void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register res
|
||||
BLOCK_COMMENT("} // arrays_hashcode");
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::arrays_hashcode_v(Register ary, Register cnt, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
BasicType eltype)
|
||||
{
|
||||
assert(UseRVV, "sanity");
|
||||
assert(StubRoutines::riscv::arrays_hashcode_powers_of_31() != nullptr, "sanity");
|
||||
assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, t0, t1);
|
||||
|
||||
// The MaxVectorSize should have been set by detecting RVV max vector register
|
||||
// size when check UseRVV (i.e. MaxVectorSize == VM_Version::_initial_vector_length).
|
||||
// Let's use T_INT as all hashCode calculations eventually deal with ints.
|
||||
const int lmul = 2;
|
||||
const int stride = MaxVectorSize / sizeof(jint) * lmul;
|
||||
|
||||
const int elsize_bytes = arrays_hashcode_elsize(eltype);
|
||||
const int elsize_shift = exact_log2(elsize_bytes);
|
||||
|
||||
switch (eltype) {
|
||||
case T_BOOLEAN: BLOCK_COMMENT("arrays_hashcode_v(unsigned byte) {"); break;
|
||||
case T_CHAR: BLOCK_COMMENT("arrays_hashcode_v(char) {"); break;
|
||||
case T_BYTE: BLOCK_COMMENT("arrays_hashcode_v(byte) {"); break;
|
||||
case T_SHORT: BLOCK_COMMENT("arrays_hashcode_v(short) {"); break;
|
||||
case T_INT: BLOCK_COMMENT("arrays_hashcode_v(int) {"); break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
const Register pow31_highest = tmp1;
|
||||
const Register ary_end = tmp2;
|
||||
const Register consumed = tmp3;
|
||||
|
||||
const VectorRegister v_sum = v2;
|
||||
const VectorRegister v_src = v4;
|
||||
const VectorRegister v_coeffs = v6;
|
||||
const VectorRegister v_tmp = v8;
|
||||
|
||||
const address adr_pows31 = StubRoutines::riscv::arrays_hashcode_powers_of_31()
|
||||
+ sizeof(jint);
|
||||
Label VEC_LOOP, DONE, SCALAR_TAIL, SCALAR_TAIL_LOOP;
|
||||
|
||||
// NB: at this point (a) 'result' already has some value,
|
||||
// (b) 'cnt' is not 0 or 1, see java code for details.
|
||||
|
||||
andi(t0, cnt, ~(stride - 1));
|
||||
beqz(t0, SCALAR_TAIL);
|
||||
|
||||
la(t1, ExternalAddress(adr_pows31));
|
||||
lw(pow31_highest, Address(t1, -1 * sizeof(jint)));
|
||||
|
||||
vsetvli(consumed, cnt, Assembler::e32, Assembler::m2);
|
||||
vle32_v(v_coeffs, t1); // 31^^(stride - 1) ... 31^^0
|
||||
vmv_v_x(v_sum, x0);
|
||||
|
||||
bind(VEC_LOOP);
|
||||
arrays_hashcode_elload_v(v_src, v_tmp, ary, eltype);
|
||||
vmul_vv(v_src, v_src, v_coeffs);
|
||||
vmadd_vx(v_sum, pow31_highest, v_src);
|
||||
mulw(result, result, pow31_highest);
|
||||
shadd(ary, consumed, ary, t0, elsize_shift);
|
||||
subw(cnt, cnt, consumed);
|
||||
andi(t1, cnt, ~(stride - 1));
|
||||
bnez(t1, VEC_LOOP);
|
||||
|
||||
vmv_s_x(v_tmp, x0);
|
||||
vredsum_vs(v_sum, v_sum, v_tmp);
|
||||
vmv_x_s(t0, v_sum);
|
||||
addw(result, result, t0);
|
||||
beqz(cnt, DONE);
|
||||
|
||||
bind(SCALAR_TAIL);
|
||||
shadd(ary_end, cnt, ary, t0, elsize_shift);
|
||||
|
||||
bind(SCALAR_TAIL_LOOP);
|
||||
arrays_hashcode_elload(t0, Address(ary), eltype);
|
||||
slli(t1, result, 5); // optimize 31 * result
|
||||
subw(result, t1, result); // with result<<5 - result
|
||||
addw(result, result, t0);
|
||||
addi(ary, ary, elsize_bytes);
|
||||
bne(ary, ary_end, SCALAR_TAIL_LOOP);
|
||||
|
||||
bind(DONE);
|
||||
BLOCK_COMMENT("} // arrays_hashcode_v");
|
||||
}
|
||||
|
||||
int C2_MacroAssembler::arrays_hashcode_elsize(BasicType eltype) {
|
||||
switch (eltype) {
|
||||
case T_BOOLEAN: return sizeof(jboolean);
|
||||
case T_BYTE: return sizeof(jbyte);
|
||||
case T_SHORT: return sizeof(jshort);
|
||||
case T_CHAR: return sizeof(jchar);
|
||||
case T_INT: return sizeof(jint);
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
return -1;
|
||||
case T_BOOLEAN: return sizeof(jboolean);
|
||||
case T_BYTE: return sizeof(jbyte);
|
||||
case T_SHORT: return sizeof(jshort);
|
||||
case T_CHAR: return sizeof(jchar);
|
||||
case T_INT: return sizeof(jint);
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::arrays_hashcode_elload(Register dst, Address src, BasicType eltype) {
|
||||
switch (eltype) {
|
||||
// T_BOOLEAN used as surrogate for unsigned byte
|
||||
case T_BOOLEAN: lbu(dst, src); break;
|
||||
case T_BYTE: lb(dst, src); break;
|
||||
case T_SHORT: lh(dst, src); break;
|
||||
case T_CHAR: lhu(dst, src); break;
|
||||
case T_INT: lw(dst, src); break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
// T_BOOLEAN used as surrogate for unsigned byte
|
||||
case T_BOOLEAN: lbu(dst, src); break;
|
||||
case T_BYTE: lb(dst, src); break;
|
||||
case T_SHORT: lh(dst, src); break;
|
||||
case T_CHAR: lhu(dst, src); break;
|
||||
case T_INT: lw(dst, src); break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::arrays_hashcode_elload_v(VectorRegister vdst,
|
||||
VectorRegister vtmp,
|
||||
Register src,
|
||||
BasicType eltype) {
|
||||
assert_different_registers(vdst, vtmp);
|
||||
switch (eltype) {
|
||||
case T_BOOLEAN:
|
||||
vle8_v(vtmp, src);
|
||||
vzext_vf4(vdst, vtmp);
|
||||
break;
|
||||
case T_BYTE:
|
||||
vle8_v(vtmp, src);
|
||||
vsext_vf4(vdst, vtmp);
|
||||
break;
|
||||
case T_CHAR:
|
||||
vle16_v(vtmp, src);
|
||||
vzext_vf2(vdst, vtmp);
|
||||
break;
|
||||
case T_SHORT:
|
||||
vle16_v(vtmp, src);
|
||||
vsext_vf2(vdst, vtmp);
|
||||
break;
|
||||
case T_INT:
|
||||
vle32_v(vdst, src);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -92,11 +92,15 @@
|
||||
Register tmp3, Register tmp4,
|
||||
Register tmp5, Register tmp6,
|
||||
BasicType eltype);
|
||||
|
||||
// helper function for arrays_hashcode
|
||||
int arrays_hashcode_elsize(BasicType eltype);
|
||||
void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
|
||||
|
||||
void arrays_hashcode_v(Register ary, Register cnt, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
BasicType eltype);
|
||||
void arrays_hashcode_elload_v(VectorRegister vdst, VectorRegister vtmp,
|
||||
Register src, BasicType eltype);
|
||||
|
||||
void string_equals(Register r1, Register r2,
|
||||
Register result, Register cnt1);
|
||||
|
||||
|
||||
@ -10991,6 +10991,7 @@ instruct arrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI ba
|
||||
iRegLNoSp tmp3, iRegLNoSp tmp4,
|
||||
iRegLNoSp tmp5, iRegLNoSp tmp6, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!UseRVV);
|
||||
match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
|
||||
USE_KILL ary, USE_KILL cnt, USE basic_type, KILL cr);
|
||||
|
||||
@ -4080,6 +4080,28 @@ instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
// fast ArraysSupport.vectorizedHashCode
|
||||
instruct varrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI basic_type,
|
||||
vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
|
||||
vReg_V6 v6, vReg_V7 v7, vReg_V8 v8, vReg_V9 v9,
|
||||
iRegLNoSp tmp1, iRegLNoSp tmp2, iRegLNoSp tmp3,
|
||||
rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseRVV);
|
||||
match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type)));
|
||||
effect(USE_KILL ary, USE_KILL cnt, USE basic_type,
|
||||
TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
|
||||
|
||||
format %{ "Array HashCode array[] $ary,$cnt,$result,$basic_type -> $result // KILL all" %}
|
||||
ins_encode %{
|
||||
__ arrays_hashcode_v($ary$$Register, $cnt$$Register, $result$$Register,
|
||||
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
||||
(BasicType)$basic_type$$constant);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct vstring_compareU_128b(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
||||
iRegI_R10 result, vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
|
||||
vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11,
|
||||
|
||||
@ -73,6 +73,9 @@
|
||||
do_stub(compiler, string_indexof_linear_ul) \
|
||||
do_arch_entry(riscv, compiler, string_indexof_linear_ul, \
|
||||
string_indexof_linear_ul, string_indexof_linear_ul) \
|
||||
do_stub(compiler, arrays_hashcode_powers_of_31) \
|
||||
do_arch_entry(riscv, compiler, arrays_hashcode_powers_of_31, \
|
||||
arrays_hashcode_powers_of_31, arrays_hashcode_powers_of_31) \
|
||||
|
||||
|
||||
#define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \
|
||||
|
||||
@ -6624,6 +6624,24 @@ static const int64_t right_3_bits = right_n_bits(3);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_arrays_hashcode_powers_of_31() {
|
||||
assert(UseRVV, "sanity");
|
||||
const int lmul = 2;
|
||||
const int stride = MaxVectorSize / sizeof(jint) * lmul;
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "arrays_hashcode_powers_of_31");
|
||||
address start = __ pc();
|
||||
for (int i = stride; i >= 0; i--) {
|
||||
jint power_of_31 = 1;
|
||||
for (int j = i; j > 0; j--) {
|
||||
power_of_31 = java_multiply(power_of_31, 31);
|
||||
}
|
||||
__ emit_int32(power_of_31);
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
#endif // COMPILER2
|
||||
|
||||
/**
|
||||
@ -6818,6 +6836,10 @@ static const int64_t right_3_bits = right_n_bits(3);
|
||||
StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
|
||||
}
|
||||
|
||||
if (UseVectorizedHashCodeIntrinsic && UseRVV) {
|
||||
StubRoutines::riscv::_arrays_hashcode_powers_of_31 = generate_arrays_hashcode_powers_of_31();
|
||||
}
|
||||
|
||||
if (UseSHA256Intrinsics) {
|
||||
Sha2Generator sha2(_masm, this);
|
||||
StubRoutines::_sha256_implCompress = sha2.generate_sha256_implCompress(StubId::stubgen_sha256_implCompress_id);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user