more IR rules

This commit is contained in:
Emanuel Peter 2025-12-15 13:36:25 +01:00
parent 96c77ae8b1
commit fdbec78a73
4 changed files with 48 additions and 16 deletions

View File

@ -89,8 +89,6 @@ public class TestVectorAlgorithms {
public static void main(String[] args) {
TestFramework framework = new TestFramework();
// TODO: run with and without SuperWord, and also some intrinsics should be disabled in a run.
// make sure that all those flags are also mentioned in the JMH.
framework.addFlags("--add-modules=jdk.incubator.vector", "-XX:CompileCommand=inline,*VectorAlgorithmsImpl::*");
switch (args[0]) {
case "vanilla" -> { /* no extra flags */ }
@ -278,8 +276,10 @@ public class TestVectorAlgorithms {
@Test
@IR(counts = {IRNode.POPULATE_INDEX, "> 0",
IRNode.STORE_VECTOR, "> 0"},
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
applyIfCPUFeatureOr = {"avx2", "true", "sve", "true"},
applyIf = {"UseSuperWord", "true"})
// Note: the Vector API example below can also vectorize for AVX,
// because it does not use a PopulateIndex.
public Object iotaI_loop(int[] r) {
return VectorAlgorithmsImpl.iotaI_loop(r);
}
@ -385,7 +385,8 @@ public class TestVectorAlgorithms {
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
IRNode.REARRANGE_VI, "> 0",
IRNode.AND_VI, "> 0",
IRNode.ADD_VI, "> 0"},
IRNode.ADD_VI, "> 0",
IRNode.STORE_VECTOR, "> 0"},
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
applyIf = {"MaxVectorSize", ">=64"})
public Object scanAddI_VectorAPI_permute_add(int[] a, int[] r) {
@ -400,6 +401,12 @@ public class TestVectorAlgorithms {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
IRNode.VECTOR_MASK_CMP, "> 0",
IRNode.VECTOR_BLEND_I, "> 0",
IRNode.MIN_REDUCTION_V, "> 0",
IRNode.ADD_VI, "> 0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
public int findMinIndexI_VectorAPI(int[] a) {
return VectorAlgorithmsImpl.findMinIndexI_VectorAPI(a);
}
@ -412,6 +419,10 @@ public class TestVectorAlgorithms {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
IRNode.VECTOR_MASK_CMP, "> 0",
IRNode.VECTOR_TEST, "> 0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
public int findI_VectorAPI(int[] a, int e) {
return VectorAlgorithmsImpl.findI_VectorAPI(a, e);
}
@ -425,6 +436,11 @@ public class TestVectorAlgorithms {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
IRNode.REARRANGE_VI, "> 0",
IRNode.AND_VI, "> 0",
IRNode.STORE_VECTOR, "> 0"},
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
public Object reverseI_VectorAPI(int[] a, int[] r) {
return VectorAlgorithmsImpl.reverseI_VectorAPI(a, r);
}
@ -437,6 +453,12 @@ public class TestVectorAlgorithms {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
IRNode.VECTOR_MASK_CMP, "> 0",
IRNode.VECTOR_TEST, "> 0",
IRNode.VECTOR_LONG_TO_MASK, "> 0",
IRNode.STORE_VECTOR_MASKED, "> 0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
public Object filterI_VectorAPI(int[] a, int[] r, int threshold) {
return VectorAlgorithmsImpl.filterI_VectorAPI(a, r, threshold);
}
@ -449,6 +471,14 @@ public class TestVectorAlgorithms {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
IRNode.VECTOR_MASK_CMP, "> 0",
IRNode.VECTOR_TEST, "> 0",
IRNode.LOAD_VECTOR_GATHER_MASKED, "> 0",
IRNode.OR_V_MASK, "> 0",
IRNode.ADD_VI, "> 0",
IRNode.ADD_REDUCTION_VI, "> 0"},
applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"})
public int reduceAddIFieldsX4_VectorAPI(int[] oops, int[] mem) {
return VectorAlgorithmsImpl.reduceAddIFieldsX4_VectorAPI(oops, mem);
}

View File

@ -258,16 +258,16 @@ public class VectorAlgorithmsImpl {
public static int findMinIndexI_VectorAPI(int[] a) {
// Main approach: have partial results in mins and idxs.
var mins = IntVector.broadcast(SPECIES_I512, a[0]);
var idxs = IntVector.broadcast(SPECIES_I512, 0);
var iota = IntVector.broadcast(SPECIES_I512, 0).addIndex(1);
var mins = IntVector.broadcast(SPECIES_I, a[0]);
var idxs = IntVector.broadcast(SPECIES_I, 0);
var iota = IntVector.broadcast(SPECIES_I, 0).addIndex(1);
int i = 0;
for (; i < SPECIES_I512.loopBound(a.length); i += SPECIES_I512.length()) {
IntVector v = IntVector.fromArray(SPECIES_I512, a, i);
for (; i < SPECIES_I.loopBound(a.length); i += SPECIES_I.length()) {
IntVector v = IntVector.fromArray(SPECIES_I, a, i);
var mask = v.compare(VectorOperators.LT, mins);
mins = mins.blend(v, mask);
idxs = idxs.blend(iota, mask);
iota = iota.add(SPECIES_I512.length());
iota = iota.add(SPECIES_I.length());
}
// Reduce the vectors down
int min = mins.reduceLanes(VectorOperators.MIN);

View File

@ -41,6 +41,8 @@ import org.openjdk.jmh.annotations.*;
* -XX:+UnlockDiagnosticVMOptions -XX:AutoVectorizationOverrideProfitability=0
* - Smaller vector size:
* -XX:MaxVectorSize=16
* - Disable fill loop detection, so we don't use intrinsic but auto vectorization:
* -XX:-OptimizeFill
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)

View File

@ -258,16 +258,16 @@ public class VectorAlgorithmsImpl {
public static int findMinIndexI_VectorAPI(int[] a) {
// Main approach: have partial results in mins and idxs.
var mins = IntVector.broadcast(SPECIES_I512, a[0]);
var idxs = IntVector.broadcast(SPECIES_I512, 0);
var iota = IntVector.broadcast(SPECIES_I512, 0).addIndex(1);
var mins = IntVector.broadcast(SPECIES_I, a[0]);
var idxs = IntVector.broadcast(SPECIES_I, 0);
var iota = IntVector.broadcast(SPECIES_I, 0).addIndex(1);
int i = 0;
for (; i < SPECIES_I512.loopBound(a.length); i += SPECIES_I512.length()) {
IntVector v = IntVector.fromArray(SPECIES_I512, a, i);
for (; i < SPECIES_I.loopBound(a.length); i += SPECIES_I.length()) {
IntVector v = IntVector.fromArray(SPECIES_I, a, i);
var mask = v.compare(VectorOperators.LT, mins);
mins = mins.blend(v, mask);
idxs = idxs.blend(iota, mask);
iota = iota.add(SPECIES_I512.length());
iota = iota.add(SPECIES_I.length());
}
// Reduce the vectors down
int min = mins.reduceLanes(VectorOperators.MIN);