mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-16 10:53:31 +00:00
8318217: RISC-V: C2 VectorizedHashCode
Reviewed-by: mli, fyang
This commit is contained in:
parent
ce4b257fa5
commit
6359b4ec23
@ -1459,6 +1459,112 @@ void C2_MacroAssembler::string_equals(Register a1, Register a2,
|
||||
BLOCK_COMMENT("} string_equals");
|
||||
}
|
||||
|
||||
// jdk.internal.util.ArraysSupport.vectorizedHashCode
|
||||
void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register result,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
Register tmp4, Register tmp5, Register tmp6,
|
||||
BasicType eltype)
|
||||
{
|
||||
assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, t0, t1);
|
||||
|
||||
const int elsize = arrays_hashcode_elsize(eltype);
|
||||
const int chunks_end_shift = exact_log2(elsize);
|
||||
|
||||
switch (eltype) {
|
||||
case T_BOOLEAN: BLOCK_COMMENT("arrays_hashcode(unsigned byte) {"); break;
|
||||
case T_CHAR: BLOCK_COMMENT("arrays_hashcode(char) {"); break;
|
||||
case T_BYTE: BLOCK_COMMENT("arrays_hashcode(byte) {"); break;
|
||||
case T_SHORT: BLOCK_COMMENT("arrays_hashcode(short) {"); break;
|
||||
case T_INT: BLOCK_COMMENT("arrays_hashcode(int) {"); break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
const int stride = 4;
|
||||
const Register pow31_4 = tmp1;
|
||||
const Register pow31_3 = tmp2;
|
||||
const Register pow31_2 = tmp3;
|
||||
const Register chunks = tmp4;
|
||||
const Register chunks_end = chunks;
|
||||
|
||||
Label DONE, TAIL, TAIL_LOOP, WIDE_LOOP;
|
||||
|
||||
// result has a value initially
|
||||
|
||||
beqz(cnt, DONE);
|
||||
|
||||
andi(chunks, cnt, ~(stride-1));
|
||||
beqz(chunks, TAIL);
|
||||
|
||||
mv(pow31_4, 923521); // [31^^4]
|
||||
mv(pow31_3, 29791); // [31^^3]
|
||||
mv(pow31_2, 961); // [31^^2]
|
||||
|
||||
slli(chunks_end, chunks, chunks_end_shift);
|
||||
add(chunks_end, ary, chunks_end);
|
||||
andi(cnt, cnt, stride-1); // don't forget about tail!
|
||||
|
||||
bind(WIDE_LOOP);
|
||||
mulw(result, result, pow31_4); // 31^^4 * h
|
||||
arrays_hashcode_elload(t0, Address(ary, 0 * elsize), eltype);
|
||||
arrays_hashcode_elload(t1, Address(ary, 1 * elsize), eltype);
|
||||
arrays_hashcode_elload(tmp5, Address(ary, 2 * elsize), eltype);
|
||||
arrays_hashcode_elload(tmp6, Address(ary, 3 * elsize), eltype);
|
||||
mulw(t0, t0, pow31_3); // 31^^3 * ary[i+0]
|
||||
addw(result, result, t0);
|
||||
mulw(t1, t1, pow31_2); // 31^^2 * ary[i+1]
|
||||
addw(result, result, t1);
|
||||
slli(t0, tmp5, 5); // optimize 31^^1 * ary[i+2]
|
||||
subw(tmp5, t0, tmp5); // with ary[i+2]<<5 - ary[i+2]
|
||||
addw(result, result, tmp5);
|
||||
addw(result, result, tmp6); // 31^^4 * h + 31^^3 * ary[i+0] + 31^^2 * ary[i+1]
|
||||
// + 31^^1 * ary[i+2] + 31^^0 * ary[i+3]
|
||||
addi(ary, ary, elsize * stride);
|
||||
bne(ary, chunks_end, WIDE_LOOP);
|
||||
beqz(cnt, DONE);
|
||||
|
||||
bind(TAIL);
|
||||
slli(chunks_end, cnt, chunks_end_shift);
|
||||
add(chunks_end, ary, chunks_end);
|
||||
|
||||
bind(TAIL_LOOP);
|
||||
arrays_hashcode_elload(t0, Address(ary), eltype);
|
||||
slli(t1, result, 5); // optimize 31 * result
|
||||
subw(result, t1, result); // with result<<5 - result
|
||||
addw(result, result, t0);
|
||||
addi(ary, ary, elsize);
|
||||
bne(ary, chunks_end, TAIL_LOOP);
|
||||
|
||||
bind(DONE);
|
||||
BLOCK_COMMENT("} // arrays_hashcode");
|
||||
}
|
||||
|
||||
int C2_MacroAssembler::arrays_hashcode_elsize(BasicType eltype) {
|
||||
switch (eltype) {
|
||||
case T_BOOLEAN: return sizeof(jboolean);
|
||||
case T_BYTE: return sizeof(jbyte);
|
||||
case T_SHORT: return sizeof(jshort);
|
||||
case T_CHAR: return sizeof(jchar);
|
||||
case T_INT: return sizeof(jint);
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::arrays_hashcode_elload(Register dst, Address src, BasicType eltype) {
|
||||
switch (eltype) {
|
||||
// T_BOOLEAN used as surrogate for unsigned byte
|
||||
case T_BOOLEAN: lbu(dst, src); break;
|
||||
case T_BYTE: lb(dst, src); break;
|
||||
case T_SHORT: lh(dst, src); break;
|
||||
case T_CHAR: lhu(dst, src); break;
|
||||
case T_INT: lw(dst, src); break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
|
||||
typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
|
||||
bool is_far, bool is_unordered);
|
||||
|
||||
@ -82,6 +82,15 @@
|
||||
Register result, Register cnt1,
|
||||
int elem_size);
|
||||
|
||||
void arrays_hashcode(Register ary, Register cnt, Register result,
|
||||
Register tmp1, Register tmp2,
|
||||
Register tmp3, Register tmp4,
|
||||
Register tmp5, Register tmp6,
|
||||
BasicType eltype);
|
||||
// helper function for arrays_hashcode
|
||||
int arrays_hashcode_elsize(BasicType eltype);
|
||||
void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
|
||||
|
||||
void string_equals(Register r1, Register r2,
|
||||
Register result, Register cnt1,
|
||||
int elem_size);
|
||||
|
||||
@ -10371,6 +10371,26 @@ instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
// fast ArraysSupport.vectorizedHashCode
|
||||
instruct arrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI basic_type,
|
||||
iRegLNoSp tmp1, iRegLNoSp tmp2,
|
||||
iRegLNoSp tmp3, iRegLNoSp tmp4,
|
||||
iRegLNoSp tmp5, iRegLNoSp tmp6, rFlagsReg cr)
|
||||
%{
|
||||
match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
|
||||
USE_KILL ary, USE_KILL cnt, USE basic_type, KILL cr);
|
||||
|
||||
format %{ "Array HashCode array[] $ary,$cnt,$result,$basic_type -> $result // KILL all" %}
|
||||
ins_encode %{
|
||||
__ arrays_hashcode($ary$$Register, $cnt$$Register, $result$$Register,
|
||||
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
||||
$tmp4$$Register, $tmp5$$Register, $tmp6$$Register,
|
||||
(BasicType)$basic_type$$constant);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
// ============================================================================
|
||||
// Safepoint Instructions
|
||||
|
||||
|
||||
@ -315,6 +315,10 @@ void VM_Version::c2_initialize() {
|
||||
}
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
|
||||
FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
|
||||
}
|
||||
|
||||
if (!UseZicbop) {
|
||||
if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
|
||||
warning("Zicbop is not available on this CPU");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user