diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 3c8defe62d9..38a28a6ec49 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -5442,6 +5442,13 @@ void Assembler::pmovsxwd(XMMRegister dst, XMMRegister src) { emit_int16(0x23, (0xC0 | encode)); } +void Assembler::pmovzxwd(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x33, (0xC0 | encode)); +} + void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 97854f712cf..57a5e25d7a6 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -1965,6 +1965,7 @@ private: void pmovsxbq(XMMRegister dst, XMMRegister src); void pmovsxbw(XMMRegister dst, XMMRegister src); void pmovsxwd(XMMRegister dst, XMMRegister src); + void pmovzxwd(XMMRegister dst, XMMRegister src); void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len); void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len); void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index a3ccc081b6b..5b5fb02967c 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1729,6 +1729,24 @@ void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegis default: assert(false, "wrong type"); } break; + case Op_UMinReductionV: + switch (typ) { + case T_BYTE: vpminub(dst, dst, src, Assembler::AVX_128bit); break; + case T_SHORT: vpminuw(dst, dst, src, Assembler::AVX_128bit); break; + case T_INT: vpminud(dst, dst, src, Assembler::AVX_128bit); break; + case T_LONG: evpminuq(dst, k0, dst, src, true, Assembler::AVX_128bit); break; + default: assert(false, "wrong type"); + } + break; + case Op_UMaxReductionV: + switch (typ) { + case T_BYTE: vpmaxub(dst, dst, src, Assembler::AVX_128bit); break; + case T_SHORT: vpmaxuw(dst, dst, src, Assembler::AVX_128bit); break; + case T_INT: vpmaxud(dst, dst, src, Assembler::AVX_128bit); break; + case T_LONG: evpmaxuq(dst, k0, dst, src, true, Assembler::AVX_128bit); break; + default: assert(false, "wrong type"); + } + break; case Op_AddReductionVF: addss(dst, src); break; case Op_AddReductionVD: addsd(dst, src); break; case Op_AddReductionVI: @@ -1792,6 +1810,24 @@ void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegis default: assert(false, "wrong type"); } break; + case Op_UMinReductionV: + switch (typ) { + case T_BYTE: vpminub(dst, src1, src2, vector_len); break; + case T_SHORT: vpminuw(dst, src1, src2, vector_len); break; + case T_INT: vpminud(dst, src1, src2, vector_len); break; + case T_LONG: evpminuq(dst, k0, src1, src2, true, vector_len); break; + default: assert(false, "wrong type"); + } + break; + case Op_UMaxReductionV: + switch (typ) { + case T_BYTE: vpmaxub(dst, src1, src2, vector_len); break; + case T_SHORT: vpmaxuw(dst, src1, src2, vector_len); break; + case T_INT: vpmaxud(dst, src1, src2, vector_len); break; + case T_LONG: evpmaxuq(dst, k0, src1, src2, true, vector_len); break; + default: assert(false, "wrong type"); + } + break; case Op_AddReductionVI: switch (typ) { case T_BYTE: vpaddb(dst, src1, src2, vector_len); break; @@ -2058,7 +2094,11 @@ void C2_MacroAssembler::reduce8B(int opcode, Register dst, Register src1, XMMReg psrldq(vtmp2, 1); reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2); movdl(vtmp2, src1); - pmovsxbd(vtmp1, vtmp1); + if (opcode == Op_UMinReductionV || opcode == Op_UMaxReductionV) { + pmovzxbd(vtmp1, vtmp1); + } else { + pmovsxbd(vtmp1, vtmp1); + } reduce_operation_128(T_INT, opcode, vtmp1, vtmp2); pextrb(dst, vtmp1, 0x0); movsbl(dst, dst); @@ -2135,7 +2175,11 @@ void C2_MacroAssembler::reduce4S(int opcode, Register dst, Register src1, XMMReg reduce_operation_128(T_SHORT, opcode, vtmp1, vtmp2); } movdl(vtmp2, src1); - pmovsxwd(vtmp1, vtmp1); + if (opcode == Op_UMinReductionV || opcode == Op_UMaxReductionV) { + pmovzxwd(vtmp1, vtmp1); + } else { + pmovsxwd(vtmp1, vtmp1); + } reduce_operation_128(T_INT, opcode, vtmp1, vtmp2); pextrw(dst, vtmp1, 0x0); movswl(dst, dst); diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 0ffa4c2031c..b155355c827 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -3341,6 +3341,18 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { return false; } break; + case Op_UMinReductionV: + case Op_UMaxReductionV: + if (UseAVX == 0) { + return false; + } + if (bt == T_LONG && !VM_Version::supports_avx512vl()) { + return false; + } + if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) { + return false; + } + break; case Op_MaxV: case Op_MinV: if (UseSSE < 4 && is_integral_type(bt)) { @@ -19371,6 +19383,8 @@ instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm match(Set dst (XorReductionV src1 src2)); match(Set dst (MinReductionV src1 src2)); match(Set dst (MaxReductionV src1 src2)); + match(Set dst (UMinReductionV src1 src2)); + match(Set dst (UMaxReductionV src1 src2)); effect(TEMP vtmp1, TEMP vtmp2); format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ @@ -19392,6 +19406,8 @@ instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtm match(Set dst (XorReductionV src1 src2)); match(Set dst (MinReductionV src1 src2)); match(Set dst (MaxReductionV src1 src2)); + match(Set dst (UMinReductionV src1 src2)); + match(Set dst (UMaxReductionV src1 src2)); effect(TEMP vtmp1, TEMP vtmp2); format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ @@ -19411,6 +19427,8 @@ instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtm match(Set dst (XorReductionV src1 src2)); match(Set dst (MinReductionV src1 src2)); match(Set dst (MaxReductionV src1 src2)); + match(Set dst (UMinReductionV src1 src2)); + match(Set dst (UMaxReductionV src1 src2)); effect(TEMP vtmp1, TEMP vtmp2); format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ @@ -19639,6 +19657,8 @@ instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm match(Set dst (XorReductionV src1 src2)); match(Set dst (MinReductionV src1 src2)); match(Set dst (MaxReductionV src1 src2)); + match(Set dst (UMinReductionV src1 src2)); + match(Set dst (UMaxReductionV src1 src2)); effect(TEMP vtmp1, TEMP vtmp2); format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ @@ -19657,6 +19677,8 @@ instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtm match(Set dst (XorReductionV src1 src2)); match(Set dst (MinReductionV src1 src2)); match(Set dst (MaxReductionV src1 src2)); + match(Set dst (UMinReductionV src1 src2)); + match(Set dst (UMaxReductionV src1 src2)); effect(TEMP vtmp1, TEMP vtmp2); format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ @@ -19678,6 +19700,8 @@ instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm match(Set dst (XorReductionV src1 src2)); match(Set dst (MinReductionV src1 src2)); match(Set dst (MaxReductionV src1 src2)); + match(Set dst (UMinReductionV src1 src2)); + match(Set dst (UMaxReductionV src1 src2)); effect(TEMP vtmp1, TEMP vtmp2); format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} ins_encode %{ diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorUMinMaxReductionTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorUMinMaxReductionTest.java index 1c8cc34170e..4c753d91afd 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/VectorUMinMaxReductionTest.java +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorUMinMaxReductionTest.java @@ -1,4 +1,5 @@ /* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -140,7 +141,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMIN_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testByteUMin() { byte got = ByteVector.fromArray(B_SPECIES, ba, 0).reduceLanes(VectorOperators.UMIN); verifyByte(B_SPECIES, got, BYTE_UMIN_IDENTITY, VectorMath::minUnsigned, false); @@ -148,7 +149,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMAX_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testByteUMax() { byte got = ByteVector.fromArray(B_SPECIES, ba, 0).reduceLanes(VectorOperators.UMAX); verifyByte(B_SPECIES, got, BYTE_UMAX_IDENTITY, VectorMath::maxUnsigned, false); @@ -156,7 +157,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMIN_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testByteUMinMasked() { byte got = ByteVector.fromArray(B_SPECIES, ba, 0) .reduceLanes(VectorOperators.UMIN, @@ -166,7 +167,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMAX_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testByteUMaxMasked() { byte got = ByteVector.fromArray(B_SPECIES, ba, 0) .reduceLanes(VectorOperators.UMAX, @@ -178,7 +179,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMIN_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testShortUMin() { short got = ShortVector.fromArray(S_SPECIES, sa, 0).reduceLanes(VectorOperators.UMIN); verifyShort(S_SPECIES, got, SHORT_UMIN_IDENTITY, VectorMath::minUnsigned, false); @@ -186,7 +187,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMAX_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testShortUMax() { short got = ShortVector.fromArray(S_SPECIES, sa, 0).reduceLanes(VectorOperators.UMAX); verifyShort(S_SPECIES, got, SHORT_UMAX_IDENTITY, VectorMath::maxUnsigned, false); @@ -194,7 +195,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMIN_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testShortUMinMasked() { short got = ShortVector.fromArray(S_SPECIES, sa, 0) .reduceLanes(VectorOperators.UMIN, @@ -204,7 +205,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMAX_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testShortUMaxMasked() { short got = ShortVector.fromArray(S_SPECIES, sa, 0) .reduceLanes(VectorOperators.UMAX, @@ -216,7 +217,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMIN_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testIntUMin() { int got = IntVector.fromArray(I_SPECIES, ia, 0).reduceLanes(VectorOperators.UMIN); verifyInt(I_SPECIES, got, INT_UMIN_IDENTITY, VectorMath::minUnsigned, false); @@ -224,7 +225,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMAX_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testIntUMax() { int got = IntVector.fromArray(I_SPECIES, ia, 0).reduceLanes(VectorOperators.UMAX); verifyInt(I_SPECIES, got, INT_UMAX_IDENTITY, VectorMath::maxUnsigned, false); @@ -232,7 +233,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMIN_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testIntUMinMasked() { int got = IntVector.fromArray(I_SPECIES, ia, 0) .reduceLanes(VectorOperators.UMIN, @@ -242,7 +243,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMAX_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}) public static void testIntUMaxMasked() { int got = IntVector.fromArray(I_SPECIES, ia, 0) .reduceLanes(VectorOperators.UMAX, @@ -254,7 +255,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMIN_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx512vl", "true"}) public static void testLongUMin() { long got = LongVector.fromArray(L_SPECIES, la, 0).reduceLanes(VectorOperators.UMIN); verifyLong(L_SPECIES, got, LONG_UMIN_IDENTITY, VectorMath::minUnsigned, false); @@ -262,7 +263,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMAX_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx512vl", "true"}) public static void testLongUMax() { long got = LongVector.fromArray(L_SPECIES, la, 0).reduceLanes(VectorOperators.UMAX); verifyLong(L_SPECIES, got, LONG_UMAX_IDENTITY, VectorMath::maxUnsigned, false); @@ -270,7 +271,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMIN_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx512vl", "true"}) public static void testLongUMinMasked() { long got = LongVector.fromArray(L_SPECIES, la, 0) .reduceLanes(VectorOperators.UMIN, @@ -280,7 +281,7 @@ public class VectorUMinMaxReductionTest { @Test @IR(counts = {IRNode.UMAX_REDUCTION_V, "= 1"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureOr = {"asimd", "true", "avx512vl", "true"}) public static void testLongUMaxMasked() { long got = LongVector.fromArray(L_SPECIES, la, 0) .reduceLanes(VectorOperators.UMAX,