mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
more IR rules
This commit is contained in:
parent
96c77ae8b1
commit
fdbec78a73
@ -89,8 +89,6 @@ public class TestVectorAlgorithms {
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework framework = new TestFramework();
|
||||
// TODO: run with and without SuperWord, and also some intrinsics should be disabled in a run.
|
||||
// make sure that all those flags are also mentioned in the JMH.
|
||||
framework.addFlags("--add-modules=jdk.incubator.vector", "-XX:CompileCommand=inline,*VectorAlgorithmsImpl::*");
|
||||
switch (args[0]) {
|
||||
case "vanilla" -> { /* no extra flags */ }
|
||||
@ -278,8 +276,10 @@ public class TestVectorAlgorithms {
|
||||
@Test
|
||||
@IR(counts = {IRNode.POPULATE_INDEX, "> 0",
|
||||
IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||||
applyIfCPUFeatureOr = {"avx2", "true", "sve", "true"},
|
||||
applyIf = {"UseSuperWord", "true"})
|
||||
// Note: the Vector API example below can also vectorize for AVX,
|
||||
// because it does not use a PopulateIndex.
|
||||
public Object iotaI_loop(int[] r) {
|
||||
return VectorAlgorithmsImpl.iotaI_loop(r);
|
||||
}
|
||||
@ -385,7 +385,8 @@ public class TestVectorAlgorithms {
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||||
IRNode.REARRANGE_VI, "> 0",
|
||||
IRNode.AND_VI, "> 0",
|
||||
IRNode.ADD_VI, "> 0"},
|
||||
IRNode.ADD_VI, "> 0",
|
||||
IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
public Object scanAddI_VectorAPI_permute_add(int[] a, int[] r) {
|
||||
@ -400,6 +401,12 @@ public class TestVectorAlgorithms {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||||
IRNode.VECTOR_MASK_CMP, "> 0",
|
||||
IRNode.VECTOR_BLEND_I, "> 0",
|
||||
IRNode.MIN_REDUCTION_V, "> 0",
|
||||
IRNode.ADD_VI, "> 0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
public int findMinIndexI_VectorAPI(int[] a) {
|
||||
return VectorAlgorithmsImpl.findMinIndexI_VectorAPI(a);
|
||||
}
|
||||
@ -412,6 +419,10 @@ public class TestVectorAlgorithms {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||||
IRNode.VECTOR_MASK_CMP, "> 0",
|
||||
IRNode.VECTOR_TEST, "> 0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
public int findI_VectorAPI(int[] a, int e) {
|
||||
return VectorAlgorithmsImpl.findI_VectorAPI(a, e);
|
||||
}
|
||||
@ -425,6 +436,11 @@ public class TestVectorAlgorithms {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||||
IRNode.REARRANGE_VI, "> 0",
|
||||
IRNode.AND_VI, "> 0",
|
||||
IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
public Object reverseI_VectorAPI(int[] a, int[] r) {
|
||||
return VectorAlgorithmsImpl.reverseI_VectorAPI(a, r);
|
||||
}
|
||||
@ -437,6 +453,12 @@ public class TestVectorAlgorithms {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||||
IRNode.VECTOR_MASK_CMP, "> 0",
|
||||
IRNode.VECTOR_TEST, "> 0",
|
||||
IRNode.VECTOR_LONG_TO_MASK, "> 0",
|
||||
IRNode.STORE_VECTOR_MASKED, "> 0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
public Object filterI_VectorAPI(int[] a, int[] r, int threshold) {
|
||||
return VectorAlgorithmsImpl.filterI_VectorAPI(a, r, threshold);
|
||||
}
|
||||
@ -449,6 +471,14 @@ public class TestVectorAlgorithms {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||||
IRNode.VECTOR_MASK_CMP, "> 0",
|
||||
IRNode.VECTOR_TEST, "> 0",
|
||||
IRNode.LOAD_VECTOR_GATHER_MASKED, "> 0",
|
||||
IRNode.OR_V_MASK, "> 0",
|
||||
IRNode.ADD_VI, "> 0",
|
||||
IRNode.ADD_REDUCTION_VI, "> 0"},
|
||||
applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"})
|
||||
public int reduceAddIFieldsX4_VectorAPI(int[] oops, int[] mem) {
|
||||
return VectorAlgorithmsImpl.reduceAddIFieldsX4_VectorAPI(oops, mem);
|
||||
}
|
||||
|
||||
@ -258,16 +258,16 @@ public class VectorAlgorithmsImpl {
|
||||
|
||||
public static int findMinIndexI_VectorAPI(int[] a) {
|
||||
// Main approach: have partial results in mins and idxs.
|
||||
var mins = IntVector.broadcast(SPECIES_I512, a[0]);
|
||||
var idxs = IntVector.broadcast(SPECIES_I512, 0);
|
||||
var iota = IntVector.broadcast(SPECIES_I512, 0).addIndex(1);
|
||||
var mins = IntVector.broadcast(SPECIES_I, a[0]);
|
||||
var idxs = IntVector.broadcast(SPECIES_I, 0);
|
||||
var iota = IntVector.broadcast(SPECIES_I, 0).addIndex(1);
|
||||
int i = 0;
|
||||
for (; i < SPECIES_I512.loopBound(a.length); i += SPECIES_I512.length()) {
|
||||
IntVector v = IntVector.fromArray(SPECIES_I512, a, i);
|
||||
for (; i < SPECIES_I.loopBound(a.length); i += SPECIES_I.length()) {
|
||||
IntVector v = IntVector.fromArray(SPECIES_I, a, i);
|
||||
var mask = v.compare(VectorOperators.LT, mins);
|
||||
mins = mins.blend(v, mask);
|
||||
idxs = idxs.blend(iota, mask);
|
||||
iota = iota.add(SPECIES_I512.length());
|
||||
iota = iota.add(SPECIES_I.length());
|
||||
}
|
||||
// Reduce the vectors down
|
||||
int min = mins.reduceLanes(VectorOperators.MIN);
|
||||
|
||||
@ -41,6 +41,8 @@ import org.openjdk.jmh.annotations.*;
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:AutoVectorizationOverrideProfitability=0
|
||||
* - Smaller vector size:
|
||||
* -XX:MaxVectorSize=16
|
||||
* - Disable fill loop detection, so we don't use intrinsic but auto vectorization:
|
||||
* -XX:-OptimizeFill
|
||||
*/
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
|
||||
@ -258,16 +258,16 @@ public class VectorAlgorithmsImpl {
|
||||
|
||||
public static int findMinIndexI_VectorAPI(int[] a) {
|
||||
// Main approach: have partial results in mins and idxs.
|
||||
var mins = IntVector.broadcast(SPECIES_I512, a[0]);
|
||||
var idxs = IntVector.broadcast(SPECIES_I512, 0);
|
||||
var iota = IntVector.broadcast(SPECIES_I512, 0).addIndex(1);
|
||||
var mins = IntVector.broadcast(SPECIES_I, a[0]);
|
||||
var idxs = IntVector.broadcast(SPECIES_I, 0);
|
||||
var iota = IntVector.broadcast(SPECIES_I, 0).addIndex(1);
|
||||
int i = 0;
|
||||
for (; i < SPECIES_I512.loopBound(a.length); i += SPECIES_I512.length()) {
|
||||
IntVector v = IntVector.fromArray(SPECIES_I512, a, i);
|
||||
for (; i < SPECIES_I.loopBound(a.length); i += SPECIES_I.length()) {
|
||||
IntVector v = IntVector.fromArray(SPECIES_I, a, i);
|
||||
var mask = v.compare(VectorOperators.LT, mins);
|
||||
mins = mins.blend(v, mask);
|
||||
idxs = idxs.blend(iota, mask);
|
||||
iota = iota.add(SPECIES_I512.length());
|
||||
iota = iota.add(SPECIES_I.length());
|
||||
}
|
||||
// Reduce the vectors down
|
||||
int min = mins.reduceLanes(VectorOperators.MIN);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user