mirror of
https://github.com/openjdk/jdk.git
synced 2026-05-03 10:20:57 +00:00
8381650: Vector rotate operations on AArch64 with NEON
Reviewed-by: aph, haosun
This commit is contained in:
parent
ca405d0eb2
commit
c8bb46be72
@ -311,6 +311,12 @@ source %{
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_RotateLeftV:
|
||||
case Op_RotateRightV:
|
||||
if (length_in_bytes > 16) {
|
||||
return false; // NEON only, since SLI/USHR are not available in SVE
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -370,6 +376,11 @@ source %{
|
||||
case Op_SqrtVHF:
|
||||
case Op_FmaVHF:
|
||||
return false;
|
||||
// There's no SLI instruction in SVE, so we can't have an optimal vector
|
||||
// rotate with masking when emitting code for SVE.
|
||||
case Op_RotateLeftV:
|
||||
case Op_RotateRightV:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -3229,6 +3240,25 @@ instruct vlsra_imm(vReg dst, vReg src, immI_positive shift) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// vector rotate with constant shift count (NEON only)
|
||||
// Uses USHR+SLI 2-instruction sequence instead of SHL+USHR+ORR 3-instruction decomposition.
|
||||
|
||||
instruct vrotateconstant(vReg dst, vReg src, immI shift) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) <= 16);
|
||||
match(Set dst (RotateLeftV src shift));
|
||||
match(Set dst (RotateRightV src shift));
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vrotateconstant $dst, $src, $shift" %}
|
||||
ins_encode %{
|
||||
int opc = this->ideal_Opcode();
|
||||
int raw_shift = checked_cast<int>(opc == Op_RotateLeftV ?
|
||||
$shift$$constant : -$shift$$constant);
|
||||
__ neon_vector_rotate($dst$$FloatRegister, get_arrangement(this),
|
||||
$src$$FloatRegister, raw_shift);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// vector shift - predicated
|
||||
|
||||
instruct vlsl_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
|
||||
|
||||
@ -301,6 +301,12 @@ source %{
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_RotateLeftV:
|
||||
case Op_RotateRightV:
|
||||
if (length_in_bytes > 16) {
|
||||
return false; // NEON only, since SLI/USHR are not available in SVE
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -360,6 +366,11 @@ source %{
|
||||
case Op_SqrtVHF:
|
||||
case Op_FmaVHF:
|
||||
return false;
|
||||
// There's no SLI instruction in SVE, so we can't have an optimal vector
|
||||
// rotate with masking when emitting code for SVE.
|
||||
case Op_RotateLeftV:
|
||||
case Op_RotateRightV:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1965,6 +1976,25 @@ instruct vlsra_imm(vReg dst, vReg src, immI_positive shift) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// vector rotate with constant shift count (NEON only)
|
||||
// Uses USHR+SLI 2-instruction sequence instead of SHL+USHR+ORR 3-instruction decomposition.
|
||||
|
||||
instruct vrotateconstant(vReg dst, vReg src, immI shift) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) <= 16);
|
||||
match(Set dst (RotateLeftV src shift));
|
||||
match(Set dst (RotateRightV src shift));
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vrotateconstant $dst, $src, $shift" %}
|
||||
ins_encode %{
|
||||
int opc = this->ideal_Opcode();
|
||||
int raw_shift = checked_cast<int>(opc == Op_RotateLeftV ?
|
||||
$shift$$constant : -$shift$$constant);
|
||||
__ neon_vector_rotate($dst$$FloatRegister, get_arrangement(this),
|
||||
$src$$FloatRegister, raw_shift);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
dnl
|
||||
dnl VSHIFT_PREDICATE($1, $2, $3 )
|
||||
dnl VSHIFT_PREDICATE(type, op_name, insn)
|
||||
|
||||
@ -7274,3 +7274,20 @@ void MacroAssembler::fast_unlock(Register obj, Register t1, Register t2, Registe
|
||||
|
||||
bind(unlocked);
|
||||
}
|
||||
|
||||
// Rotate using USHR and SLI instructions (or copy, if rotate count is zero)
|
||||
void MacroAssembler::neon_vector_rotate(FloatRegister dst, SIMD_Arrangement T,
|
||||
FloatRegister src, int shift_amount) {
|
||||
assert(src != dst, "did not expect src and dst to be the same register");
|
||||
|
||||
int esize = BitsPerByte << (T / 2);
|
||||
int lshift = shift_amount & (esize - 1);
|
||||
|
||||
if (lshift == 0) {
|
||||
// T & 1 == 0 => 64-bit arrangements, else 128-bit arrangements
|
||||
orr(dst, (T & 1) == 0 ? T8B : T16B, src, src);
|
||||
} else {
|
||||
ushr(dst, T, src, esize - lshift);
|
||||
sli(dst, T, src, lshift);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1627,6 +1627,10 @@ public:
|
||||
const FloatRegister (&stateVectors)[16], int idx1, int idx2,
|
||||
int idx3, int idx4);
|
||||
|
||||
// Rotate using ORR (for identity) or USHR + SLI.
|
||||
void neon_vector_rotate(FloatRegister dst, SIMD_Arrangement T,
|
||||
FloatRegister src, int shift_amount);
|
||||
|
||||
// Place an ISB after code may have been modified due to a safepoint.
|
||||
void safepoint_isb();
|
||||
|
||||
|
||||
@ -141,8 +141,10 @@
|
||||
}
|
||||
|
||||
// Does the CPU supports vector constant rotate instructions?
|
||||
// NEON supports constant rotates via USHR+SLI (2-instruction sequence).
|
||||
// The shift value will be masked to the element width in the .ad rule.
|
||||
static constexpr bool supports_vector_constant_rotates(int shift) {
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Does the CPU supports vector unsigned comparison instructions?
|
||||
|
||||
@ -0,0 +1,450 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import jdk.incubator.vector.IntVector;
|
||||
import jdk.incubator.vector.ByteVector;
|
||||
import jdk.incubator.vector.LongVector;
|
||||
import jdk.incubator.vector.ShortVector;
|
||||
import jdk.incubator.vector.VectorOperators;
|
||||
import jdk.incubator.vector.VectorSpecies;
|
||||
import jdk.test.lib.Asserts;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @summary Test that constant-count vector rotates on AArch64 NEON emit
|
||||
* RotateLeftV / RotateRightV IR nodes instead of decomposing them.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires os.arch == "aarch64" & vm.cpu.features ~= ".*simd.*"
|
||||
* @modules jdk.incubator.vector
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.vectorapi.TestVectorRotateConstantAArch64
|
||||
*/
|
||||
public class TestVectorRotateConstantAArch64 {
|
||||
|
||||
static final VectorSpecies<Long> L_SPECIES_128 = LongVector.SPECIES_128;
|
||||
static final VectorSpecies<Integer> I_SPECIES_128 = IntVector.SPECIES_128;
|
||||
static final VectorSpecies<Short> S_SPECIES_128 = ShortVector.SPECIES_128;
|
||||
static final VectorSpecies<Byte> B_SPECIES_128 = ByteVector.SPECIES_128;
|
||||
|
||||
static final int SIZE = 256;
|
||||
static final int INT_INCR = I_SPECIES_128.length();
|
||||
static final int BYTE_INCR = B_SPECIES_128.length();
|
||||
static final int LONG_INCR = L_SPECIES_128.length();
|
||||
static final int SHORT_INCR = S_SPECIES_128.length();
|
||||
static final int INT_BOUND = I_SPECIES_128.loopBound(SIZE);
|
||||
static final int BYTE_BOUND = B_SPECIES_128.loopBound(SIZE);
|
||||
static final int LONG_BOUND = L_SPECIES_128.loopBound(SIZE);
|
||||
static final int SHORT_BOUND = S_SPECIES_128.loopBound(SIZE);
|
||||
|
||||
static int[] iinp = new int[SIZE];
|
||||
static int[] iout = new int[SIZE];
|
||||
static byte[] binp = new byte[SIZE];
|
||||
static byte[] bout = new byte[SIZE];
|
||||
static long[] linp = new long[SIZE];
|
||||
static long[] lout = new long[SIZE];
|
||||
static short[] sinp = new short[SIZE];
|
||||
static short[] sout = new short[SIZE];
|
||||
|
||||
static {
|
||||
Random r = new Random(42);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
iinp[i] = r.nextInt();
|
||||
linp[i] = r.nextLong();
|
||||
binp[i] = (byte) r.nextInt();
|
||||
sinp[i] = (short) r.nextInt();
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftInt_shift0() {
|
||||
for (int i = 0; i < INT_BOUND; i += INT_INCR) {
|
||||
IntVector.fromArray(I_SPECIES_128, iinp, i)
|
||||
.lanewise(VectorOperators.ROL, 0)
|
||||
.intoArray(iout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftInt_shift31() {
|
||||
for (int i = 0; i < INT_BOUND; i += INT_INCR) {
|
||||
IntVector.fromArray(I_SPECIES_128, iinp, i)
|
||||
.lanewise(VectorOperators.ROL, 31)
|
||||
.intoArray(iout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftInt_shift37() {
|
||||
for (int i = 0; i < INT_BOUND; i += INT_INCR) {
|
||||
IntVector.fromArray(I_SPECIES_128, iinp, i)
|
||||
.lanewise(VectorOperators.ROL, 37)
|
||||
.intoArray(iout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_RIGHT_V, "> 0"})
|
||||
public static void testRotateRightInt_shift13() {
|
||||
for (int i = 0; i < INT_BOUND; i += INT_INCR) {
|
||||
IntVector.fromArray(I_SPECIES_128, iinp, i)
|
||||
.lanewise(VectorOperators.ROR, 13)
|
||||
.intoArray(iout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_RIGHT_V, "> 0"})
|
||||
public static void testRotateRightInt_shift31() {
|
||||
for (int i = 0; i < INT_BOUND; i += INT_INCR) {
|
||||
IntVector.fromArray(I_SPECIES_128, iinp, i)
|
||||
.lanewise(VectorOperators.ROR, 31)
|
||||
.intoArray(iout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftLong_shift0() {
|
||||
for (int i = 0; i < LONG_BOUND; i += LONG_INCR) {
|
||||
LongVector.fromArray(L_SPECIES_128, linp, i)
|
||||
.lanewise(VectorOperators.ROL, 0)
|
||||
.intoArray(lout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftLong_shift63() {
|
||||
for (int i = 0; i < LONG_BOUND; i += LONG_INCR) {
|
||||
LongVector.fromArray(L_SPECIES_128, linp, i)
|
||||
.lanewise(VectorOperators.ROL, 63)
|
||||
.intoArray(lout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftLong_shift67() {
|
||||
for (int i = 0; i < LONG_BOUND; i += LONG_INCR) {
|
||||
LongVector.fromArray(L_SPECIES_128, linp, i)
|
||||
.lanewise(VectorOperators.ROL, 67)
|
||||
.intoArray(lout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_RIGHT_V, "> 0"})
|
||||
public static void testRotateRightLong_shift13() {
|
||||
for (int i = 0; i < LONG_BOUND; i += LONG_INCR) {
|
||||
LongVector.fromArray(L_SPECIES_128, linp, i)
|
||||
.lanewise(VectorOperators.ROR, 13)
|
||||
.intoArray(lout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_RIGHT_V, "> 0"})
|
||||
public static void testRotateRightLong_shift63() {
|
||||
for (int i = 0; i < LONG_BOUND; i += LONG_INCR) {
|
||||
LongVector.fromArray(L_SPECIES_128, linp, i)
|
||||
.lanewise(VectorOperators.ROR, 63)
|
||||
.intoArray(lout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftShort_shift0() {
|
||||
for (int i = 0; i < SHORT_BOUND; i += SHORT_INCR) {
|
||||
ShortVector.fromArray(S_SPECIES_128, sinp, i)
|
||||
.lanewise(VectorOperators.ROL, 0)
|
||||
.intoArray(sout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftShort_shift7() {
|
||||
for (int i = 0; i < SHORT_BOUND; i += SHORT_INCR) {
|
||||
ShortVector.fromArray(S_SPECIES_128, sinp, i)
|
||||
.lanewise(VectorOperators.ROL, 7)
|
||||
.intoArray(sout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftShort_shift15() {
|
||||
for (int i = 0; i < SHORT_BOUND; i += SHORT_INCR) {
|
||||
ShortVector.fromArray(S_SPECIES_128, sinp, i)
|
||||
.lanewise(VectorOperators.ROL, 15)
|
||||
.intoArray(sout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftShort_shift16() {
|
||||
for (int i = 0; i < SHORT_BOUND; i += SHORT_INCR) {
|
||||
ShortVector.fromArray(S_SPECIES_128, sinp, i)
|
||||
.lanewise(VectorOperators.ROL, 16)
|
||||
.intoArray(sout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftShort_shift19() {
|
||||
for (int i = 0; i < SHORT_BOUND; i += SHORT_INCR) {
|
||||
ShortVector.fromArray(S_SPECIES_128, sinp, i)
|
||||
.lanewise(VectorOperators.ROL, 19)
|
||||
.intoArray(sout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_RIGHT_V, "> 0"})
|
||||
public static void testRotateRightShort_shift5() {
|
||||
for (int i = 0; i < SHORT_BOUND; i += SHORT_INCR) {
|
||||
ShortVector.fromArray(S_SPECIES_128, sinp, i)
|
||||
.lanewise(VectorOperators.ROR, 5)
|
||||
.intoArray(sout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftByte_shift0() {
|
||||
for (int i = 0; i < BYTE_BOUND; i += BYTE_INCR) {
|
||||
ByteVector.fromArray(B_SPECIES_128, binp, i)
|
||||
.lanewise(VectorOperators.ROL, 0)
|
||||
.intoArray(bout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftByte_shift4() {
|
||||
for (int i = 0; i < BYTE_BOUND; i += BYTE_INCR) {
|
||||
ByteVector.fromArray(B_SPECIES_128, binp, i)
|
||||
.lanewise(VectorOperators.ROL, 4)
|
||||
.intoArray(bout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftByte_shift7() {
|
||||
for (int i = 0; i < BYTE_BOUND; i += BYTE_INCR) {
|
||||
ByteVector.fromArray(B_SPECIES_128, binp, i)
|
||||
.lanewise(VectorOperators.ROL, 7)
|
||||
.intoArray(bout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftByte_shift8() {
|
||||
for (int i = 0; i < BYTE_BOUND; i += BYTE_INCR) {
|
||||
ByteVector.fromArray(B_SPECIES_128, binp, i)
|
||||
.lanewise(VectorOperators.ROL, 8)
|
||||
.intoArray(bout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_LEFT_V, "> 0"})
|
||||
public static void testRotateLeftByte_shift11() {
|
||||
for (int i = 0; i < BYTE_BOUND; i += BYTE_INCR) {
|
||||
ByteVector.fromArray(B_SPECIES_128, binp, i)
|
||||
.lanewise(VectorOperators.ROL, 11)
|
||||
.intoArray(bout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ROTATE_RIGHT_V, "> 0"})
|
||||
public static void testRotateRightByte_shift3() {
|
||||
for (int i = 0; i < BYTE_BOUND; i += BYTE_INCR) {
|
||||
ByteVector.fromArray(B_SPECIES_128, binp, i)
|
||||
.lanewise(VectorOperators.ROR, 3)
|
||||
.intoArray(bout, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {
|
||||
// Int tests
|
||||
"testRotateLeftInt_shift0", "testRotateLeftInt_shift31",
|
||||
"testRotateLeftInt_shift37", "testRotateRightInt_shift13",
|
||||
"testRotateRightInt_shift31",
|
||||
|
||||
// Long tests
|
||||
"testRotateLeftLong_shift0", "testRotateLeftLong_shift63",
|
||||
"testRotateLeftLong_shift67", "testRotateRightLong_shift13",
|
||||
"testRotateRightLong_shift63",
|
||||
|
||||
// Short tests
|
||||
"testRotateLeftShort_shift0", "testRotateLeftShort_shift7",
|
||||
"testRotateLeftShort_shift15", "testRotateLeftShort_shift16",
|
||||
"testRotateLeftShort_shift19", "testRotateRightShort_shift5",
|
||||
|
||||
// Byte tests
|
||||
"testRotateLeftByte_shift0", "testRotateLeftByte_shift4",
|
||||
"testRotateLeftByte_shift7", "testRotateLeftByte_shift8",
|
||||
"testRotateLeftByte_shift11", "testRotateRightByte_shift3",
|
||||
})
|
||||
public void verifyAllRotates() {
|
||||
testRotateLeftInt_shift0();
|
||||
verifyInt(iout, iinp, 0, true);
|
||||
|
||||
testRotateLeftInt_shift31();
|
||||
verifyInt(iout, iinp, 31, true);
|
||||
|
||||
testRotateLeftInt_shift37();
|
||||
verifyInt(iout, iinp, 37, true);
|
||||
|
||||
testRotateRightInt_shift13();
|
||||
verifyInt(iout, iinp, 13, false);
|
||||
|
||||
testRotateRightInt_shift31();
|
||||
verifyInt(iout, iinp, 31, false);
|
||||
|
||||
testRotateLeftLong_shift0();
|
||||
verifyLong(lout, linp, 0, true);
|
||||
|
||||
testRotateLeftLong_shift63();
|
||||
verifyLong(lout, linp, 63, true);
|
||||
|
||||
testRotateLeftLong_shift67();
|
||||
verifyLong(lout, linp, 67, true);
|
||||
|
||||
testRotateRightLong_shift13();
|
||||
verifyLong(lout, linp, 13, false);
|
||||
|
||||
testRotateRightLong_shift63();
|
||||
verifyLong(lout, linp, 63, false);
|
||||
|
||||
testRotateLeftShort_shift0();
|
||||
verifyShort(sout, sinp, 0, true);
|
||||
|
||||
testRotateLeftShort_shift7();
|
||||
verifyShort(sout, sinp, 7, true);
|
||||
|
||||
testRotateLeftShort_shift15();
|
||||
verifyShort(sout, sinp, 15, true);
|
||||
|
||||
testRotateLeftShort_shift16();
|
||||
verifyShort(sout, sinp, 16, true);
|
||||
|
||||
testRotateLeftShort_shift19();
|
||||
verifyShort(sout, sinp, 19, true);
|
||||
|
||||
testRotateRightShort_shift5();
|
||||
verifyShort(sout, sinp, 5, false);
|
||||
|
||||
testRotateLeftByte_shift0();
|
||||
verifyByte(bout, binp, 0, true);
|
||||
|
||||
testRotateLeftByte_shift4();
|
||||
verifyByte(bout, binp, 4, true);
|
||||
|
||||
testRotateLeftByte_shift7();
|
||||
verifyByte(bout, binp, 7, true);
|
||||
|
||||
testRotateLeftByte_shift8();
|
||||
verifyByte(bout, binp, 8, true);
|
||||
|
||||
testRotateLeftByte_shift11();
|
||||
verifyByte(bout, binp, 11, true);
|
||||
|
||||
testRotateRightByte_shift3();
|
||||
verifyByte(bout, binp, 3, false);
|
||||
}
|
||||
|
||||
static void verifyInt(int[] dst, int[] src, int shift, boolean left) {
|
||||
int bound = I_SPECIES_128.loopBound(src.length);
|
||||
for (int i = 0; i < bound; i++) {
|
||||
int expected = left ? Integer.rotateLeft(src[i], shift)
|
||||
: Integer.rotateRight(src[i], shift);
|
||||
Asserts.assertEquals(dst[i], expected,
|
||||
"int rotate" + (left ? "Left" : "Right") + " failed at index " + i +
|
||||
": src=" + Integer.toHexString(src[i]) + " shift=" + shift);
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyLong(long[] dst, long[] src, int shift, boolean left) {
|
||||
int bound = L_SPECIES_128.loopBound(src.length);
|
||||
for (int i = 0; i < bound; i++) {
|
||||
long expected = left ? Long.rotateLeft(src[i], shift)
|
||||
: Long.rotateRight(src[i], shift);
|
||||
Asserts.assertEquals(dst[i], expected,
|
||||
"long rotate" + (left ? "Left" : "Right") + " failed at index " + i +
|
||||
": src=" + Long.toHexString(src[i]) + " shift=" + shift);
|
||||
}
|
||||
}
|
||||
|
||||
static short rotateShort(short val, int shift, boolean left) {
|
||||
int n = left ? (shift & 15) : ((-shift) & 15);
|
||||
int unsigned = val & 0xFFFF;
|
||||
return (short) ((unsigned << n) | (unsigned >>> (16 - n)));
|
||||
}
|
||||
|
||||
static void verifyShort(short[] dst, short[] src, int shift, boolean left) {
|
||||
int bound = S_SPECIES_128.loopBound(src.length);
|
||||
for (int i = 0; i < bound; i++) {
|
||||
short expected = rotateShort(src[i], shift, left);
|
||||
Asserts.assertEquals(dst[i], expected,
|
||||
"short rotate" + (left ? "Left" : "Right") + " failed at index " + i +
|
||||
": src=" + Integer.toHexString(src[i] & 0xFFFF) + " shift=" + shift);
|
||||
}
|
||||
}
|
||||
|
||||
static byte rotateByte(byte val, int shift, boolean left) {
|
||||
int n = left ? (shift & 7) : ((-shift) & 7);
|
||||
int unsigned = val & 0xFF;
|
||||
return (byte) ((unsigned << n) | (unsigned >>> (8 - n)));
|
||||
}
|
||||
|
||||
static void verifyByte(byte[] dst, byte[] src, int shift, boolean left) {
|
||||
int bound = B_SPECIES_128.loopBound(src.length);
|
||||
for (int i = 0; i < bound; i++) {
|
||||
byte expected = rotateByte(src[i], shift, left);
|
||||
Asserts.assertEquals(dst[i], expected,
|
||||
"byte rotate" + (left ? "Left" : "Right") + " failed at index " + i +
|
||||
": src=" + Integer.toHexString(src[i] & 0xFF) + " shift=" + shift);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -80,6 +80,9 @@ public class ArrayShiftOpTest extends VectorizationTestRunner {
|
||||
counts = {IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(applyIfCPUFeatureOr = {"avx512f", "true", "zvbb", "true"},
|
||||
counts = {IRNode.ROTATE_RIGHT_V, ">0"})
|
||||
@IR(applyIfCPUFeature = {"asimd", "true"},
|
||||
applyIf = {"MaxVectorSize", "<= 16"},
|
||||
counts = {IRNode.ROTATE_RIGHT_V, ">0"})
|
||||
public int[] intCombinedRotateShift() {
|
||||
int[] res = new int[SIZE];
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
@ -109,6 +112,9 @@ public class ArrayShiftOpTest extends VectorizationTestRunner {
|
||||
counts = {IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(applyIfCPUFeatureOr = {"avx512f", "true", "zvbb", "true"},
|
||||
counts = {IRNode.ROTATE_RIGHT_V, ">0"})
|
||||
@IR(applyIfCPUFeature = {"asimd", "true"},
|
||||
applyIf = {"MaxVectorSize", "<= 16"},
|
||||
counts = {IRNode.ROTATE_RIGHT_V, ">0"})
|
||||
public long[] longCombinedRotateShift() {
|
||||
long[] res = new long[SIZE];
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user