mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-25 07:09:55 +00:00
8345110: RISC-V: Optimize and and clean up byte reverse assembler routines
Reviewed-by: mli, rehn
This commit is contained in:
parent
959fa4a1a3
commit
08d563ba15
@ -2461,41 +2461,6 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// reverse bytes in halfword in lower 16 bits and sign-extend
|
||||
// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
|
||||
void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
|
||||
if (UseZbb) {
|
||||
rev8(Rd, Rs);
|
||||
srai(Rd, Rd, 48);
|
||||
return;
|
||||
}
|
||||
assert_different_registers(Rs, tmp);
|
||||
assert_different_registers(Rd, tmp);
|
||||
srli(tmp, Rs, 8);
|
||||
andi(tmp, tmp, 0xFF);
|
||||
slli(Rd, Rs, 56);
|
||||
srai(Rd, Rd, 48); // sign-extend
|
||||
orr(Rd, Rd, tmp);
|
||||
}
|
||||
|
||||
// reverse bytes in lower word and sign-extend
|
||||
// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
|
||||
void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
|
||||
if (UseZbb) {
|
||||
rev8(Rd, Rs);
|
||||
srai(Rd, Rd, 32);
|
||||
return;
|
||||
}
|
||||
assert_different_registers(Rs, tmp1, tmp2);
|
||||
assert_different_registers(Rd, tmp1, tmp2);
|
||||
revb_h_w_u(Rd, Rs, tmp1, tmp2);
|
||||
slli(tmp2, Rd, 48);
|
||||
srai(tmp2, tmp2, 32); // sign-extend
|
||||
srli(Rd, Rd, 16);
|
||||
orr(Rd, Rd, tmp2);
|
||||
}
|
||||
|
||||
// reverse bytes in halfword in lower 16 bits and zero-extend
|
||||
// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
|
||||
void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
|
||||
@ -2532,56 +2497,28 @@ void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Registe
|
||||
orr(Rd, Rd, tmp2);
|
||||
}
|
||||
|
||||
// This method is only used for revb_h
|
||||
// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
|
||||
void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
|
||||
assert_different_registers(Rs, tmp1, tmp2);
|
||||
assert_different_registers(Rd, tmp1);
|
||||
srli(tmp1, Rs, 48);
|
||||
andi(tmp2, tmp1, 0xFF);
|
||||
slli(tmp2, tmp2, 8);
|
||||
srli(tmp1, tmp1, 8);
|
||||
orr(tmp1, tmp1, tmp2);
|
||||
slli(Rd, Rs, 16);
|
||||
orr(Rd, Rd, tmp1);
|
||||
}
|
||||
|
||||
// reverse bytes in each halfword
|
||||
// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
|
||||
void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
|
||||
if (UseZbb) {
|
||||
assert_different_registers(Rs, tmp1);
|
||||
assert_different_registers(Rd, tmp1);
|
||||
rev8(Rd, Rs);
|
||||
zero_extend(tmp1, Rd, 32);
|
||||
roriw(tmp1, tmp1, 16);
|
||||
slli(tmp1, tmp1, 32);
|
||||
srli(Rd, Rd, 32);
|
||||
roriw(Rd, Rd, 16);
|
||||
zero_extend(Rd, Rd, 32);
|
||||
orr(Rd, Rd, tmp1);
|
||||
return;
|
||||
}
|
||||
assert_different_registers(Rs, tmp1, tmp2);
|
||||
assert_different_registers(Rd, tmp1, tmp2);
|
||||
revb_h_helper(Rd, Rs, tmp1, tmp2);
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
revb_h_helper(Rd, Rd, tmp1, tmp2);
|
||||
}
|
||||
}
|
||||
|
||||
// reverse bytes in each word
|
||||
// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
|
||||
// reverse bytes in lower word, sign-extend
|
||||
// Rd[32:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
|
||||
void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
|
||||
if (UseZbb) {
|
||||
rev8(Rd, Rs);
|
||||
rori(Rd, Rd, 32);
|
||||
srai(Rd, Rd, 32);
|
||||
return;
|
||||
}
|
||||
assert_different_registers(Rs, tmp1, tmp2);
|
||||
assert_different_registers(Rd, tmp1, tmp2);
|
||||
revb(Rd, Rs, tmp1, tmp2);
|
||||
ror_imm(Rd, Rd, 32);
|
||||
andi(tmp1, Rs, 0xFF);
|
||||
slli(tmp1, tmp1, 8);
|
||||
for (int step = 8; step < 24; step += 8) {
|
||||
srli(tmp2, Rs, step);
|
||||
andi(tmp2, tmp2, 0xFF);
|
||||
orr(tmp1, tmp1, tmp2);
|
||||
slli(tmp1, tmp1, 8);
|
||||
}
|
||||
srli(Rd, Rs, 24);
|
||||
andi(Rd, Rd, 0xFF);
|
||||
orr(Rd, tmp1, Rd);
|
||||
sign_extend(Rd, Rd, 32);
|
||||
}
|
||||
|
||||
// reverse bytes in doubleword
|
||||
|
||||
@ -913,13 +913,9 @@ public:
|
||||
void orn(Register Rd, Register Rs1, Register Rs2);
|
||||
|
||||
// revb
|
||||
void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend
|
||||
void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend
|
||||
void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend
|
||||
void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend
|
||||
void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower
|
||||
void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword
|
||||
void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word
|
||||
void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in lower word, sign-extend
|
||||
void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword
|
||||
|
||||
void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
|
||||
|
||||
@ -181,11 +181,15 @@ instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
|
||||
match(Set dst (ReverseBytesI src));
|
||||
|
||||
ins_cost(ALU_COST * 2);
|
||||
format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %}
|
||||
format %{
|
||||
"rev8 $dst, $src\t#@bytes_reverse_int_b\t\n"
|
||||
"srai $dst, $dst, 32\t\n"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
assert(UseZbb, "must be");
|
||||
__ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
|
||||
__ rev8(as_Register($dst$$reg), as_Register($src$$reg));
|
||||
__ srai(as_Register($dst$$reg), as_Register($dst$$reg), 32);
|
||||
%}
|
||||
|
||||
ins_pipe(ialu_reg);
|
||||
@ -209,11 +213,15 @@ instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
|
||||
match(Set dst (ReverseBytesUS src));
|
||||
|
||||
ins_cost(ALU_COST * 2);
|
||||
format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
|
||||
format %{
|
||||
"rev8 $dst, $src\t#@bytes_reverse_unsigned_short_b\t\n"
|
||||
"srli $dst, $dst, 48\t\n"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
assert(UseZbb, "must be");
|
||||
__ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
|
||||
__ rev8(as_Register($dst$$reg), as_Register($src$$reg));
|
||||
__ srli(as_Register($dst$$reg), as_Register($dst$$reg), 48);
|
||||
%}
|
||||
|
||||
ins_pipe(ialu_reg);
|
||||
@ -223,11 +231,15 @@ instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
|
||||
match(Set dst (ReverseBytesS src));
|
||||
|
||||
ins_cost(ALU_COST * 2);
|
||||
format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %}
|
||||
format %{
|
||||
"rev8 $dst, $src\t#@bytes_reverse_short_b\t\n"
|
||||
"srai $dst, $dst, 48\t\n"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
assert(UseZbb, "must be");
|
||||
__ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
|
||||
__ rev8(as_Register($dst$$reg), as_Register($src$$reg));
|
||||
__ srai(as_Register($dst$$reg), as_Register($dst$$reg), 48);
|
||||
%}
|
||||
|
||||
ins_pipe(ialu_reg);
|
||||
|
||||
@ -1621,13 +1621,14 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
|
||||
|
||||
// load branch displacement
|
||||
if (!is_wide) {
|
||||
// Convert the 16-bit value into native byte-ordering and sign-extend
|
||||
__ lb(x12, at_bcp(1));
|
||||
__ lbu(t1, at_bcp(2));
|
||||
__ slli(x12, x12, 8);
|
||||
__ add(x12, x12, t1);
|
||||
} else {
|
||||
__ lwu(x12, at_bcp(1));
|
||||
__ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
|
||||
__ revb_w(x12, x12);
|
||||
}
|
||||
|
||||
// Handle all the JSR stuff here, then exit.
|
||||
@ -1892,8 +1893,8 @@ void TemplateTable::tableswitch() {
|
||||
// load lo & hi
|
||||
__ lwu(x12, Address(x11, BytesPerInt));
|
||||
__ lwu(x13, Address(x11, 2 * BytesPerInt));
|
||||
__ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
|
||||
__ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
|
||||
__ revb_w(x12, x12);
|
||||
__ revb_w(x13, x13);
|
||||
// check against lo & hi
|
||||
__ blt(x10, x12, default_case);
|
||||
__ bgt(x10, x13, default_case);
|
||||
@ -1904,7 +1905,7 @@ void TemplateTable::tableswitch() {
|
||||
__ profile_switch_case(x10, x11, x12);
|
||||
// continue execution
|
||||
__ bind(continue_execution);
|
||||
__ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
|
||||
__ revb_w(x13, x13);
|
||||
__ add(xbcp, xbcp, x13);
|
||||
__ load_unsigned_byte(t0, Address(xbcp));
|
||||
__ dispatch_only(vtos, /*generate_poll*/true);
|
||||
@ -1924,7 +1925,7 @@ void TemplateTable::fast_linearswitch() {
|
||||
transition(itos, vtos);
|
||||
Label loop_entry, loop, found, continue_execution;
|
||||
// bswap x10 so we can avoid bswapping the table entries
|
||||
__ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
|
||||
__ revb_w(x10, x10);
|
||||
// align xbcp
|
||||
__ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
|
||||
// this instruction (change offsets
|
||||
@ -1932,6 +1933,9 @@ void TemplateTable::fast_linearswitch() {
|
||||
__ andi(x9, x9, -BytesPerInt);
|
||||
// set counter
|
||||
__ lwu(x11, Address(x9, BytesPerInt));
|
||||
// Convert the 32-bit npairs (number of pairs) into native byte-ordering
|
||||
// We can use sign-extension here because npairs must be greater than or
|
||||
// equal to 0 per JVM spec on 'lookupswitch' bytecode.
|
||||
__ revb_w(x11, x11);
|
||||
__ j(loop_entry);
|
||||
// table search
|
||||
@ -1953,7 +1957,7 @@ void TemplateTable::fast_linearswitch() {
|
||||
__ profile_switch_case(x11, x10, x9);
|
||||
// continue execution
|
||||
__ bind(continue_execution);
|
||||
__ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
|
||||
__ revb_w(x13, x13);
|
||||
__ add(xbcp, xbcp, x13);
|
||||
__ lbu(t0, Address(xbcp, 0));
|
||||
__ dispatch_only(vtos, /*generate_poll*/true);
|
||||
@ -2005,7 +2009,9 @@ void TemplateTable::fast_binaryswitch() {
|
||||
__ mv(i, zr); // i = 0
|
||||
__ lwu(j, Address(array, -BytesPerInt)); // j = length(array)
|
||||
|
||||
// Convert j into native byteordering
|
||||
// Convert the 32-bit npairs (number of pairs) into native byte-ordering
|
||||
// We can use sign-extension here because npairs must be greater than or
|
||||
// equal to 0 per JVM spec on 'lookupswitch' bytecode.
|
||||
__ revb_w(j, j);
|
||||
|
||||
// And start
|
||||
@ -2024,7 +2030,7 @@ void TemplateTable::fast_binaryswitch() {
|
||||
// Convert array[h].match to native byte-ordering before compare
|
||||
__ shadd(temp, h, array, temp, 3);
|
||||
__ lwu(temp, Address(temp, 0));
|
||||
__ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
|
||||
__ revb_w(temp, temp);
|
||||
|
||||
Label L_done, L_greater;
|
||||
__ bge(key, temp, L_greater);
|
||||
@ -2047,14 +2053,14 @@ void TemplateTable::fast_binaryswitch() {
|
||||
// Convert array[i].match to native byte-ordering before compare
|
||||
__ shadd(temp, i, array, temp, 3);
|
||||
__ lwu(temp, Address(temp, 0));
|
||||
__ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
|
||||
__ revb_w(temp, temp);
|
||||
__ bne(key, temp, default_case);
|
||||
|
||||
// entry found -> j = offset
|
||||
__ shadd(temp, i, array, temp, 3);
|
||||
__ lwu(j, Address(temp, BytesPerInt));
|
||||
__ profile_switch_case(i, key, array);
|
||||
__ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
|
||||
__ revb_w(j, j);
|
||||
|
||||
__ add(temp, xbcp, j);
|
||||
__ load_unsigned_byte(t0, Address(temp, 0));
|
||||
@ -2067,7 +2073,7 @@ void TemplateTable::fast_binaryswitch() {
|
||||
__ bind(default_case);
|
||||
__ profile_switch_default(i);
|
||||
__ lwu(j, Address(array, -2 * BytesPerInt));
|
||||
__ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
|
||||
__ revb_w(j, j);
|
||||
|
||||
__ add(temp, xbcp, j);
|
||||
__ load_unsigned_byte(t0, Address(temp, 0));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user