mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8339910: RISC-V: crc32 intrinsic with carry-less multiplication
Reviewed-by: rehn, luhenry
This commit is contained in:
parent
e0d6398783
commit
c517ffba7d
@ -115,6 +115,7 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
||||
"Use Zihintpause instructions") \
|
||||
product(bool, UseZtso, false, EXPERIMENTAL, "Assume Ztso memory model") \
|
||||
product(bool, UseZvbb, false, EXPERIMENTAL, "Use Zvbb instructions") \
|
||||
product(bool, UseZvbc, false, EXPERIMENTAL, "Use Zvbc instructions") \
|
||||
product(bool, UseZvfh, false, DIAGNOSTIC, "Use Zvfh instructions") \
|
||||
product(bool, UseZvkn, false, EXPERIMENTAL, \
|
||||
"Use Zvkn group extension, Zvkned, Zvknhb, Zvkb, Zvkt") \
|
||||
|
||||
@ -1712,6 +1712,359 @@ void MacroAssembler::vector_update_crc32(Register crc, Register buf, Register le
|
||||
addi(buf, buf, N*4);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::crc32_vclmul_fold_16_bytes_vectorsize_16(VectorRegister vx, VectorRegister vt,
|
||||
VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
|
||||
Register buf, Register tmp, const int STEP) {
|
||||
assert_different_registers(vx, vt, vtmp1, vtmp2, vtmp3, vtmp4);
|
||||
vclmul_vv(vtmp1, vx, vt);
|
||||
vclmulh_vv(vtmp2, vx, vt);
|
||||
vle64_v(vtmp4, buf); addi(buf, buf, STEP);
|
||||
// low parts
|
||||
vredxor_vs(vtmp3, vtmp1, vtmp4);
|
||||
// high parts
|
||||
vslidedown_vi(vx, vtmp4, 1);
|
||||
vredxor_vs(vtmp1, vtmp2, vx);
|
||||
// merge low and high back
|
||||
vslideup_vi(vx, vtmp1, 1);
|
||||
vmv_x_s(tmp, vtmp3);
|
||||
vmv_s_x(vx, tmp);
|
||||
}
|
||||
|
||||
void MacroAssembler::crc32_vclmul_fold_16_bytes_vectorsize_16_2(VectorRegister vx, VectorRegister vy, VectorRegister vt,
|
||||
VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
|
||||
Register tmp) {
|
||||
assert_different_registers(vx, vy, vt, vtmp1, vtmp2, vtmp3, vtmp4);
|
||||
vclmul_vv(vtmp1, vx, vt);
|
||||
vclmulh_vv(vtmp2, vx, vt);
|
||||
// low parts
|
||||
vredxor_vs(vtmp3, vtmp1, vy);
|
||||
// high parts
|
||||
vslidedown_vi(vtmp4, vy, 1);
|
||||
vredxor_vs(vtmp1, vtmp2, vtmp4);
|
||||
// merge low and high back
|
||||
vslideup_vi(vx, vtmp1, 1);
|
||||
vmv_x_s(tmp, vtmp3);
|
||||
vmv_s_x(vx, tmp);
|
||||
}
|
||||
|
||||
void MacroAssembler::crc32_vclmul_fold_16_bytes_vectorsize_16_3(VectorRegister vx, VectorRegister vy, VectorRegister vt,
|
||||
VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
|
||||
Register tmp) {
|
||||
assert_different_registers(vx, vy, vt, vtmp1, vtmp2, vtmp3, vtmp4);
|
||||
vclmul_vv(vtmp1, vx, vt);
|
||||
vclmulh_vv(vtmp2, vx, vt);
|
||||
// low parts
|
||||
vredxor_vs(vtmp3, vtmp1, vy);
|
||||
// high parts
|
||||
vslidedown_vi(vtmp4, vy, 1);
|
||||
vredxor_vs(vtmp1, vtmp2, vtmp4);
|
||||
// merge low and high back
|
||||
vslideup_vi(vy, vtmp1, 1);
|
||||
vmv_x_s(tmp, vtmp3);
|
||||
vmv_s_x(vy, tmp);
|
||||
}
|
||||
|
||||
void MacroAssembler::kernel_crc32_vclmul_fold_vectorsize_16(Register crc, Register buf, Register len,
|
||||
Register vclmul_table, Register tmp1, Register tmp2) {
|
||||
assert_different_registers(crc, buf, len, vclmul_table, tmp1, tmp2, t1);
|
||||
assert(MaxVectorSize == 16, "sanity");
|
||||
|
||||
const int TABLE_STEP = 16;
|
||||
const int STEP = 16;
|
||||
const int LOOP_STEP = 128;
|
||||
const int N = 2;
|
||||
|
||||
Register loop_step = t1;
|
||||
|
||||
// ======== preparation ========
|
||||
|
||||
mv(loop_step, LOOP_STEP);
|
||||
sub(len, len, loop_step);
|
||||
|
||||
vsetivli(zr, N, Assembler::e64, Assembler::m1, Assembler::mu, Assembler::tu);
|
||||
vle64_v(v0, buf); addi(buf, buf, STEP);
|
||||
vle64_v(v1, buf); addi(buf, buf, STEP);
|
||||
vle64_v(v2, buf); addi(buf, buf, STEP);
|
||||
vle64_v(v3, buf); addi(buf, buf, STEP);
|
||||
vle64_v(v4, buf); addi(buf, buf, STEP);
|
||||
vle64_v(v5, buf); addi(buf, buf, STEP);
|
||||
vle64_v(v6, buf); addi(buf, buf, STEP);
|
||||
vle64_v(v7, buf); addi(buf, buf, STEP);
|
||||
|
||||
vmv_v_x(v31, zr);
|
||||
vsetivli(zr, 1, Assembler::e32, Assembler::m1, Assembler::mu, Assembler::tu);
|
||||
vmv_s_x(v31, crc);
|
||||
vsetivli(zr, N, Assembler::e64, Assembler::m1, Assembler::mu, Assembler::tu);
|
||||
vxor_vv(v0, v0, v31);
|
||||
|
||||
// load table
|
||||
vle64_v(v31, vclmul_table);
|
||||
|
||||
Label L_16_bytes_loop;
|
||||
j(L_16_bytes_loop);
|
||||
|
||||
|
||||
// ======== folding 128 bytes in data buffer per round ========
|
||||
|
||||
align(OptoLoopAlignment);
|
||||
bind(L_16_bytes_loop);
|
||||
{
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16(v0, v31, v8, v9, v10, v11, buf, tmp2, STEP);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16(v1, v31, v12, v13, v14, v15, buf, tmp2, STEP);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16(v2, v31, v16, v17, v18, v19, buf, tmp2, STEP);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16(v3, v31, v20, v21, v22, v23, buf, tmp2, STEP);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16(v4, v31, v24, v25, v26, v27, buf, tmp2, STEP);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16(v5, v31, v8, v9, v10, v11, buf, tmp2, STEP);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16(v6, v31, v12, v13, v14, v15, buf, tmp2, STEP);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16(v7, v31, v16, v17, v18, v19, buf, tmp2, STEP);
|
||||
}
|
||||
sub(len, len, loop_step);
|
||||
bge(len, loop_step, L_16_bytes_loop);
|
||||
|
||||
|
||||
// ======== folding into 64 bytes from 128 bytes in register ========
|
||||
|
||||
// load table
|
||||
addi(vclmul_table, vclmul_table, TABLE_STEP);
|
||||
vle64_v(v31, vclmul_table);
|
||||
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16_2(v0, v4, v31, v8, v9, v10, v11, tmp2);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16_2(v1, v5, v31, v12, v13, v14, v15, tmp2);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16_2(v2, v6, v31, v16, v17, v18, v19, tmp2);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16_2(v3, v7, v31, v20, v21, v22, v23, tmp2);
|
||||
|
||||
|
||||
// ======== folding into 16 bytes from 64 bytes in register ========
|
||||
|
||||
addi(vclmul_table, vclmul_table, TABLE_STEP);
|
||||
vle64_v(v31, vclmul_table);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16_3(v0, v3, v31, v8, v9, v10, v11, tmp2);
|
||||
|
||||
addi(vclmul_table, vclmul_table, TABLE_STEP);
|
||||
vle64_v(v31, vclmul_table);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16_3(v1, v3, v31, v12, v13, v14, v15, tmp2);
|
||||
|
||||
addi(vclmul_table, vclmul_table, TABLE_STEP);
|
||||
vle64_v(v31, vclmul_table);
|
||||
crc32_vclmul_fold_16_bytes_vectorsize_16_3(v2, v3, v31, v16, v17, v18, v19, tmp2);
|
||||
|
||||
#undef FOLD_2_VCLMUL_3
|
||||
|
||||
|
||||
// ======== final: move result to scalar regsiters ========
|
||||
|
||||
vmv_x_s(tmp1, v3);
|
||||
vslidedown_vi(v1, v3, 1);
|
||||
vmv_x_s(tmp2, v1);
|
||||
}
|
||||
|
||||
void MacroAssembler::crc32_vclmul_fold_to_16_bytes_vectorsize_32(VectorRegister vx, VectorRegister vy, VectorRegister vt,
|
||||
VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4) {
|
||||
assert_different_registers(vx, vy, vt, vtmp1, vtmp2, vtmp3, vtmp4);
|
||||
vclmul_vv(vtmp1, vx, vt);
|
||||
vclmulh_vv(vtmp2, vx, vt);
|
||||
// low parts
|
||||
vredxor_vs(vtmp3, vtmp1, vy);
|
||||
// high parts
|
||||
vslidedown_vi(vtmp4, vy, 1);
|
||||
vredxor_vs(vtmp1, vtmp2, vtmp4);
|
||||
// merge low and high back
|
||||
vslideup_vi(vy, vtmp1, 1);
|
||||
vmv_x_s(t1, vtmp3);
|
||||
vmv_s_x(vy, t1);
|
||||
}
|
||||
|
||||
void MacroAssembler::kernel_crc32_vclmul_fold_vectorsize_32(Register crc, Register buf, Register len,
|
||||
Register vclmul_table, Register tmp1, Register tmp2) {
|
||||
assert_different_registers(crc, buf, len, vclmul_table, tmp1, tmp2, t1);
|
||||
assert(MaxVectorSize >= 32, "sanity");
|
||||
|
||||
// utility: load table
|
||||
#define CRC32_VCLMUL_LOAD_TABLE(vt, rt, vtmp, rtmp) \
|
||||
vid_v(vtmp); \
|
||||
mv(rtmp, 2); \
|
||||
vremu_vx(vtmp, vtmp, rtmp); \
|
||||
vsll_vi(vtmp, vtmp, 3); \
|
||||
vluxei64_v(vt, rt, vtmp);
|
||||
|
||||
const int TABLE_STEP = 16;
|
||||
const int STEP = 128; // 128 bytes per round
|
||||
const int N = 2 * 8; // 2: 128-bits/64-bits, 8: 8 pairs of double 64-bits
|
||||
|
||||
Register step = tmp2;
|
||||
|
||||
|
||||
// ======== preparation ========
|
||||
|
||||
mv(step, STEP);
|
||||
sub(len, len, step); // 2 rounds of folding with carry-less multiplication
|
||||
|
||||
vsetivli(zr, N, Assembler::e64, Assembler::m4, Assembler::mu, Assembler::tu);
|
||||
// load data
|
||||
vle64_v(v4, buf);
|
||||
add(buf, buf, step);
|
||||
|
||||
// load table
|
||||
CRC32_VCLMUL_LOAD_TABLE(v8, vclmul_table, v28, t1);
|
||||
// load mask,
|
||||
// v28 should already contains: 0, 8, 0, 8, ...
|
||||
vmseq_vi(v2, v28, 0);
|
||||
// now, v2 should contains: 101010...
|
||||
vmnand_mm(v1, v2, v2);
|
||||
// now, v1 should contains: 010101...
|
||||
|
||||
// initial crc
|
||||
vmv_v_x(v24, zr);
|
||||
vsetivli(zr, 1, Assembler::e32, Assembler::m4, Assembler::mu, Assembler::tu);
|
||||
vmv_s_x(v24, crc);
|
||||
vsetivli(zr, N, Assembler::e64, Assembler::m4, Assembler::mu, Assembler::tu);
|
||||
vxor_vv(v4, v4, v24);
|
||||
|
||||
Label L_128_bytes_loop;
|
||||
j(L_128_bytes_loop);
|
||||
|
||||
|
||||
// ======== folding 128 bytes in data buffer per round ========
|
||||
|
||||
align(OptoLoopAlignment);
|
||||
bind(L_128_bytes_loop);
|
||||
{
|
||||
// v4: data
|
||||
// v4: buf, reused
|
||||
// v8: table
|
||||
// v12: lows
|
||||
// v16: highs
|
||||
// v20: low_slides
|
||||
// v24: high_slides
|
||||
vclmul_vv(v12, v4, v8);
|
||||
vclmulh_vv(v16, v4, v8);
|
||||
vle64_v(v4, buf);
|
||||
add(buf, buf, step);
|
||||
// lows
|
||||
vslidedown_vi(v20, v12, 1);
|
||||
vmand_mm(v0, v2, v2);
|
||||
vxor_vv(v12, v12, v20, v0_t);
|
||||
// with buf data
|
||||
vxor_vv(v4, v4, v12, v0_t);
|
||||
|
||||
// highs
|
||||
vslideup_vi(v24, v16, 1);
|
||||
vmand_mm(v0, v1, v1);
|
||||
vxor_vv(v16, v16, v24, v0_t);
|
||||
// with buf data
|
||||
vxor_vv(v4, v4, v16, v0_t);
|
||||
}
|
||||
sub(len, len, step);
|
||||
bge(len, step, L_128_bytes_loop);
|
||||
|
||||
|
||||
// ======== folding into 64 bytes from 128 bytes in register ========
|
||||
|
||||
// load table
|
||||
addi(vclmul_table, vclmul_table, TABLE_STEP);
|
||||
CRC32_VCLMUL_LOAD_TABLE(v8, vclmul_table, v28, t1);
|
||||
|
||||
// v4: data, first (low) part, N/2 of 64-bits
|
||||
// v20: data, second (high) part, N/2 of 64-bits
|
||||
// v8: table
|
||||
// v10: lows
|
||||
// v12: highs
|
||||
// v14: low_slides
|
||||
// v16: high_slides
|
||||
|
||||
// high part
|
||||
vslidedown_vi(v20, v4, N/2);
|
||||
|
||||
vsetivli(zr, N/2, Assembler::e64, Assembler::m2, Assembler::mu, Assembler::tu);
|
||||
|
||||
vclmul_vv(v10, v4, v8);
|
||||
vclmulh_vv(v12, v4, v8);
|
||||
|
||||
// lows
|
||||
vslidedown_vi(v14, v10, 1);
|
||||
vmand_mm(v0, v2, v2);
|
||||
vxor_vv(v10, v10, v14, v0_t);
|
||||
// with data part 2
|
||||
vxor_vv(v4, v20, v10, v0_t);
|
||||
|
||||
// highs
|
||||
vslideup_vi(v16, v12, 1);
|
||||
vmand_mm(v0, v1, v1);
|
||||
vxor_vv(v12, v12, v16, v0_t);
|
||||
// with data part 2
|
||||
vxor_vv(v4, v20, v12, v0_t);
|
||||
|
||||
|
||||
// ======== folding into 16 bytes from 64 bytes in register ========
|
||||
|
||||
// v4: data, first part, 2 of 64-bits
|
||||
// v16: data, second part, 2 of 64-bits
|
||||
// v18: data, third part, 2 of 64-bits
|
||||
// v20: data, second part, 2 of 64-bits
|
||||
// v8: table
|
||||
|
||||
vslidedown_vi(v16, v4, 2);
|
||||
vslidedown_vi(v18, v4, 4);
|
||||
vslidedown_vi(v20, v4, 6);
|
||||
|
||||
vsetivli(zr, 2, Assembler::e64, Assembler::m1, Assembler::mu, Assembler::tu);
|
||||
|
||||
addi(vclmul_table, vclmul_table, TABLE_STEP);
|
||||
vle64_v(v8, vclmul_table);
|
||||
crc32_vclmul_fold_to_16_bytes_vectorsize_32(v4, v20, v8, v28, v29, v30, v31);
|
||||
|
||||
addi(vclmul_table, vclmul_table, TABLE_STEP);
|
||||
vle64_v(v8, vclmul_table);
|
||||
crc32_vclmul_fold_to_16_bytes_vectorsize_32(v16, v20, v8, v28, v29, v30, v31);
|
||||
|
||||
addi(vclmul_table, vclmul_table, TABLE_STEP);
|
||||
vle64_v(v8, vclmul_table);
|
||||
crc32_vclmul_fold_to_16_bytes_vectorsize_32(v18, v20, v8, v28, v29, v30, v31);
|
||||
|
||||
|
||||
// ======== final: move result to scalar regsiters ========
|
||||
|
||||
vmv_x_s(tmp1, v20);
|
||||
vslidedown_vi(v4, v20, 1);
|
||||
vmv_x_s(tmp2, v4);
|
||||
|
||||
#undef CRC32_VCLMUL_LOAD_TABLE
|
||||
}
|
||||
|
||||
// For more details of the algorithm, please check the paper:
|
||||
// "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction - Intel"
|
||||
//
|
||||
// Please also refer to the corresponding code in aarch64 or x86 ones.
|
||||
//
|
||||
// As the riscv carry-less multiplication is a bit different from the other platforms,
|
||||
// so the implementation itself is also a bit different from others.
|
||||
|
||||
void MacroAssembler::kernel_crc32_vclmul_fold(Register crc, Register buf, Register len,
|
||||
Register table0, Register table1, Register table2, Register table3,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
|
||||
const int64_t single_table_size = 256;
|
||||
const int64_t table_num = 8; // 4 for scalar, 4 for plain vector
|
||||
const ExternalAddress table_addr = StubRoutines::crc_table_addr();
|
||||
Register vclmul_table = tmp3;
|
||||
|
||||
la(vclmul_table, table_addr);
|
||||
add(vclmul_table, vclmul_table, table_num*single_table_size*sizeof(juint), tmp1);
|
||||
la(table0, table_addr);
|
||||
|
||||
if (MaxVectorSize == 16) {
|
||||
kernel_crc32_vclmul_fold_vectorsize_16(crc, buf, len, vclmul_table, tmp1, tmp2);
|
||||
} else {
|
||||
kernel_crc32_vclmul_fold_vectorsize_32(crc, buf, len, vclmul_table, tmp1, tmp2);
|
||||
}
|
||||
|
||||
mv(crc, zr);
|
||||
update_word_crc32(crc, tmp1, tmp3, tmp4, tmp5, table0, table1, table2, table3, false);
|
||||
update_word_crc32(crc, tmp1, tmp3, tmp4, tmp5, table0, table1, table2, table3, true);
|
||||
update_word_crc32(crc, tmp2, tmp3, tmp4, tmp5, table0, table1, table2, table3, false);
|
||||
update_word_crc32(crc, tmp2, tmp3, tmp4, tmp5, table0, table1, table2, table3, true);
|
||||
}
|
||||
|
||||
#endif // COMPILER2
|
||||
|
||||
/**
|
||||
@ -1765,7 +2118,9 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
|
||||
|
||||
#ifdef COMPILER2
|
||||
if (UseRVV) {
|
||||
const int64_t tmp_limit = MaxVectorSize >= 32 ? unroll_words*3 : unroll_words*5;
|
||||
const int64_t tmp_limit =
|
||||
UseZvbc ? 128 * 3 // 3 rounds of folding with carry-less multiplication
|
||||
: MaxVectorSize >= 32 ? unroll_words*3 : unroll_words*5;
|
||||
mv(tmp1, tmp_limit);
|
||||
bge(len, tmp1, L_vector_entry);
|
||||
}
|
||||
@ -1827,7 +2182,13 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
|
||||
j(L_exit);
|
||||
|
||||
bind(L_vector_entry);
|
||||
vector_update_crc32(crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp6, table0, table3);
|
||||
if (UseZvbc) { // carry-less multiplication
|
||||
kernel_crc32_vclmul_fold(crc, buf, len,
|
||||
table0, table1, table2, table3,
|
||||
tmp1, tmp2, tmp3, tmp4, tmp6);
|
||||
} else { // plain vector instructions
|
||||
vector_update_crc32(crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp6, table0, table3);
|
||||
}
|
||||
|
||||
bgtz(len, L_by4_loop_entry);
|
||||
}
|
||||
|
||||
@ -1309,6 +1309,24 @@ public:
|
||||
void vector_update_crc32(Register crc, Register buf, Register len,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
|
||||
Register table0, Register table3);
|
||||
void kernel_crc32_vclmul_fold(Register crc, Register buf, Register len,
|
||||
Register table0, Register table1, Register table2, Register table3,
|
||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
|
||||
void crc32_vclmul_fold_to_16_bytes_vectorsize_32(VectorRegister vx, VectorRegister vy, VectorRegister vt,
|
||||
VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4);
|
||||
void kernel_crc32_vclmul_fold_vectorsize_32(Register crc, Register buf, Register len,
|
||||
Register vclmul_table, Register tmp1, Register tmp2);
|
||||
void crc32_vclmul_fold_16_bytes_vectorsize_16(VectorRegister vx, VectorRegister vt,
|
||||
VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
|
||||
Register buf, Register tmp, const int STEP);
|
||||
void crc32_vclmul_fold_16_bytes_vectorsize_16_2(VectorRegister vx, VectorRegister vy, VectorRegister vt,
|
||||
VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
|
||||
Register tmp);
|
||||
void crc32_vclmul_fold_16_bytes_vectorsize_16_3(VectorRegister vx, VectorRegister vy, VectorRegister vt,
|
||||
VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
|
||||
Register tmp);
|
||||
void kernel_crc32_vclmul_fold_vectorsize_16(Register crc, Register buf, Register len,
|
||||
Register vclmul_table, Register tmp1, Register tmp2);
|
||||
|
||||
void mul_add(Register out, Register in, Register offset,
|
||||
Register len, Register k, Register tmp);
|
||||
|
||||
@ -479,5 +479,17 @@ ATTRIBUTE_ALIGNED(4096) juint StubRoutines::riscv::_crc_table[] =
|
||||
0x29413c29, 0x548c7116, 0xd2dba657, 0xaf16eb68, 0x05050e94,
|
||||
0x78c843ab, 0xfe9f94ea, 0x8352d9d5, 0x71c95953, 0x0c04146c,
|
||||
0x8a53c32d, 0xf79e8e12, 0x5d8d6bee, 0x204026d1, 0xa617f190,
|
||||
0xdbdabcaf
|
||||
0xdbdabcaf,
|
||||
|
||||
// CRC32 table for carry-less multiplication implementation
|
||||
0xe88ef372UL, 0x00000001UL,
|
||||
0x4a7fe880UL, 0x00000001UL,
|
||||
0x54442bd4UL, 0x00000001UL,
|
||||
0xc6e41596UL, 0x00000001UL,
|
||||
0x3db1ecdcUL, 0x00000000UL,
|
||||
0x74359406UL, 0x00000001UL,
|
||||
0xf1da05aaUL, 0x00000000UL,
|
||||
0x5a546366UL, 0x00000001UL,
|
||||
0x751997d0UL, 0x00000001UL,
|
||||
0xccaa009eUL, 0x00000000UL,
|
||||
};
|
||||
|
||||
@ -355,6 +355,14 @@ void VM_Version::c2_initialize() {
|
||||
warning("Cannot enable UseZvbb on cpu without RVV support.");
|
||||
}
|
||||
|
||||
// UseZvbc (depends on RVV).
|
||||
if (UseZvbc && !UseRVV) {
|
||||
if (!FLAG_IS_DEFAULT(UseZvbc)) {
|
||||
warning("Cannot enable UseZvbc on cpu without RVV support.");
|
||||
}
|
||||
FLAG_SET_DEFAULT(UseZvbc, false);
|
||||
}
|
||||
|
||||
// SHA's
|
||||
if (FLAG_IS_DEFAULT(UseSHA)) {
|
||||
FLAG_SET_DEFAULT(UseSHA, true);
|
||||
|
||||
@ -164,6 +164,7 @@ class VM_Version : public Abstract_VM_Version {
|
||||
decl(ext_Zihintpause , "Zihintpause" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZihintpause)) \
|
||||
decl(ext_Zacas , "Zacas" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZacas)) \
|
||||
decl(ext_Zvbb , "Zvbb" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZvbb)) \
|
||||
decl(ext_Zvbc , "Zvbc" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZvbc)) \
|
||||
decl(ext_Zvfh , "Zvfh" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZvfh)) \
|
||||
decl(ext_Zvkn , "Zvkn" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZvkn)) \
|
||||
decl(ext_Zicond , "Zicond" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZicond)) \
|
||||
|
||||
@ -178,6 +178,9 @@ void RiscvHwprobe::add_features_from_query_result() {
|
||||
if (is_set(RISCV_HWPROBE_KEY_IMA_EXT_0, RISCV_HWPROBE_EXT_ZFH)) {
|
||||
VM_Version::ext_Zfh.enable_feature();
|
||||
}
|
||||
if (is_set(RISCV_HWPROBE_KEY_IMA_EXT_0, RISCV_HWPROBE_EXT_ZVBC)) {
|
||||
VM_Version::ext_Zvbc.enable_feature();
|
||||
}
|
||||
if (is_set(RISCV_HWPROBE_KEY_IMA_EXT_0, RISCV_HWPROBE_EXT_ZVFH)) {
|
||||
VM_Version::ext_Zvfh.enable_feature();
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user