mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8286972: Support the new loop induction variable related PopulateIndex IR node on x86
Reviewed-by: kvn, jbhateja
This commit is contained in:
parent
8122466fbb
commit
5d8d6da36a
@ -2274,6 +2274,84 @@ void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegist
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc) {
|
||||
assert(UseAVX >= 2, "required");
|
||||
#ifdef ASSERT
|
||||
bool is_bw = ((elem_bt == T_BYTE) || (elem_bt == T_SHORT));
|
||||
bool is_bw_supported = VM_Version::supports_avx512bw();
|
||||
if (is_bw && !is_bw_supported) {
|
||||
assert(vlen_enc != Assembler::AVX_512bit, "required");
|
||||
assert((dst->encoding() < 16) && (src1->encoding() < 16) && (src2->encoding() < 16),
|
||||
"XMM register should be 0-15");
|
||||
}
|
||||
#endif // ASSERT
|
||||
switch (elem_bt) {
|
||||
case T_BYTE: vpaddb(dst, src1, src2, vlen_enc); return;
|
||||
case T_SHORT: vpaddw(dst, src1, src2, vlen_enc); return;
|
||||
case T_INT: vpaddd(dst, src1, src2, vlen_enc); return;
|
||||
case T_FLOAT: vaddps(dst, src1, src2, vlen_enc); return;
|
||||
case T_LONG: vpaddq(dst, src1, src2, vlen_enc); return;
|
||||
case T_DOUBLE: vaddpd(dst, src1, src2, vlen_enc); return;
|
||||
default: assert(false, "%s", type2name(elem_bt));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc) {
|
||||
assert(UseAVX >= 2, "required");
|
||||
bool is_bw = ((elem_bt == T_BYTE) || (elem_bt == T_SHORT));
|
||||
bool is_vl = vlen_enc != Assembler::AVX_512bit;
|
||||
if ((UseAVX > 2) &&
|
||||
(!is_bw || VM_Version::supports_avx512bw()) &&
|
||||
(!is_vl || VM_Version::supports_avx512vl())) {
|
||||
switch (elem_bt) {
|
||||
case T_BYTE: evpbroadcastb(dst, src, vlen_enc); return;
|
||||
case T_SHORT: evpbroadcastw(dst, src, vlen_enc); return;
|
||||
case T_FLOAT: case T_INT: evpbroadcastd(dst, src, vlen_enc); return;
|
||||
case T_DOUBLE: case T_LONG: evpbroadcastq(dst, src, vlen_enc); return;
|
||||
default: assert(false, "%s", type2name(elem_bt));
|
||||
}
|
||||
} else {
|
||||
assert(vlen_enc != Assembler::AVX_512bit, "required");
|
||||
assert((dst->encoding() < 16),"XMM register should be 0-15");
|
||||
switch (elem_bt) {
|
||||
case T_BYTE: movdl(dst, src); vpbroadcastb(dst, dst, vlen_enc); return;
|
||||
case T_SHORT: movdl(dst, src); vpbroadcastw(dst, dst, vlen_enc); return;
|
||||
case T_INT: movdl(dst, src); vpbroadcastd(dst, dst, vlen_enc); return;
|
||||
case T_FLOAT: movdl(dst, src); vbroadcastss(dst, dst, vlen_enc); return;
|
||||
case T_LONG: movdq(dst, src); vpbroadcastq(dst, dst, vlen_enc); return;
|
||||
case T_DOUBLE: movdq(dst, src); vbroadcastsd(dst, dst, vlen_enc); return;
|
||||
default: assert(false, "%s", type2name(elem_bt));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void C2_MacroAssembler::vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc) {
|
||||
switch (to_elem_bt) {
|
||||
case T_SHORT:
|
||||
vpmovsxbw(dst, src, vlen_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
vpmovsxbd(dst, src, vlen_enc);
|
||||
break;
|
||||
case T_FLOAT:
|
||||
vpmovsxbd(dst, src, vlen_enc);
|
||||
vcvtdq2ps(dst, dst, vlen_enc);
|
||||
break;
|
||||
case T_LONG:
|
||||
vpmovsxbq(dst, src, vlen_enc);
|
||||
break;
|
||||
case T_DOUBLE: {
|
||||
int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
|
||||
vpmovsxbd(dst, src, mid_vlen_enc);
|
||||
vcvtdq2pd(dst, dst, vlen_enc);
|
||||
break;
|
||||
}
|
||||
default: assert(false, "%s", type2name(to_elem_bt));
|
||||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------------
|
||||
|
||||
// IndexOf for constant substrings with size >= 8 chars
|
||||
|
||||
@ -132,6 +132,13 @@ public:
|
||||
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);
|
||||
|
||||
// Covert B2X
|
||||
void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
|
||||
#ifdef _LP64
|
||||
void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
|
||||
#endif
|
||||
void vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
|
||||
|
||||
// blend
|
||||
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
|
||||
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len);
|
||||
|
||||
@ -1468,6 +1468,11 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_PopulateIndex:
|
||||
if (!is_LP64 || (UseAVX < 2)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_RoundVF:
|
||||
if (UseAVX < 2) { // enabled for AVX2 only
|
||||
return false;
|
||||
@ -1811,6 +1816,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
return false; // Implementation limitation
|
||||
}
|
||||
break;
|
||||
case Op_PopulateIndex:
|
||||
if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
|
||||
return false;
|
||||
}
|
||||
case Op_VectorCastB2X:
|
||||
case Op_VectorCastS2X:
|
||||
case Op_VectorCastI2X:
|
||||
@ -6918,28 +6927,7 @@ instruct vcastBtoX(vec dst, vec src) %{
|
||||
|
||||
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
switch (to_elem_bt) {
|
||||
case T_SHORT:
|
||||
__ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
__ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
break;
|
||||
case T_FLOAT:
|
||||
__ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
__ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
break;
|
||||
case T_LONG:
|
||||
__ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
break;
|
||||
case T_DOUBLE: {
|
||||
int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
|
||||
__ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
|
||||
__ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
break;
|
||||
}
|
||||
default: assert(false, "%s", type2name(to_elem_bt));
|
||||
}
|
||||
__ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -8272,6 +8260,45 @@ instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
#ifdef _LP64
|
||||
instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp, rRegP scratch) %{
|
||||
match(Set dst (PopulateIndex src1 src2));
|
||||
effect(TEMP dst, TEMP vtmp, TEMP scratch);
|
||||
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp and $scratch as TEMP" %}
|
||||
ins_encode %{
|
||||
assert($src2$$constant == 1, "required");
|
||||
int vlen = Matcher::vector_length(this);
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
BasicType elem_bt = Matcher::vector_element_basic_type(this);
|
||||
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
|
||||
__ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen);
|
||||
if (elem_bt != T_BYTE) {
|
||||
__ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp, rRegP scratch) %{
|
||||
match(Set dst (PopulateIndex src1 src2));
|
||||
effect(TEMP dst, TEMP vtmp, TEMP scratch);
|
||||
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp and $scratch as TEMP" %}
|
||||
ins_encode %{
|
||||
assert($src2$$constant == 1, "required");
|
||||
int vlen = Matcher::vector_length(this);
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
BasicType elem_bt = Matcher::vector_element_basic_type(this);
|
||||
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
|
||||
__ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen);
|
||||
if (elem_bt != T_BYTE) {
|
||||
__ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
#endif
|
||||
//-------------------------------- Rearrange ----------------------------------
|
||||
|
||||
// LoadShuffle/Rearrange for Byte
|
||||
|
||||
115
test/hotspot/jtreg/compiler/vectorization/TestPopulateIndex.java
Normal file
115
test/hotspot/jtreg/compiler/vectorization/TestPopulateIndex.java
Normal file
@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8286972
|
||||
* @summary Test vectorization of loop induction variable usage in the loop
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.simpleArch == "x64" & vm.cpu.features ~= ".*avx2.*") |
|
||||
* (os.simpleArch == "aarch64" & vm.cpu.features ~= ".*sve.*")
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.vectorization.TestPopulateIndex
|
||||
*/
|
||||
|
||||
package compiler.vectorization;
|
||||
import compiler.lib.ir_framework.*;
|
||||
import java.util.Random;
|
||||
|
||||
public class TestPopulateIndex {
|
||||
private static final int count = 10000;
|
||||
|
||||
private int[] idx;
|
||||
private int[] src;
|
||||
private int[] dst;
|
||||
private float[] f;
|
||||
|
||||
public static void main(String args[]) {
|
||||
TestFramework.run(TestPopulateIndex.class);
|
||||
}
|
||||
|
||||
public TestPopulateIndex() {
|
||||
idx = new int[count];
|
||||
src = new int[count];
|
||||
dst = new int[count];
|
||||
f = new float[count];
|
||||
Random ran = new Random(0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
src[i] = ran.nextInt();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"PopulateIndex", ">= 1"})
|
||||
public void indexArrayFill() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
idx[i] = i;
|
||||
}
|
||||
checkResultIndexArrayFill();
|
||||
}
|
||||
|
||||
public void checkResultIndexArrayFill() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
int expected = i;
|
||||
if (idx[i] != expected) {
|
||||
throw new RuntimeException("Invalid result: idx[" + i + "] = " + idx[i] + " != " + expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"PopulateIndex", ">= 1"})
|
||||
public void exprWithIndex1() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
dst[i] = src[i] * (i & 7);
|
||||
}
|
||||
checkResultExprWithIndex1();
|
||||
}
|
||||
|
||||
public void checkResultExprWithIndex1() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
int expected = src[i] * (i & 7);
|
||||
if (dst[i] != expected) {
|
||||
throw new RuntimeException("Invalid result: dst[" + i + "] = " + dst[i] + " != " + expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"PopulateIndex", ">= 1"})
|
||||
public void exprWithIndex2() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
f[i] = i * i + 100;
|
||||
}
|
||||
checkResultExprWithIndex2();
|
||||
}
|
||||
|
||||
public void checkResultExprWithIndex2() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
float expected = i * i + 100;
|
||||
if (f[i] != expected) {
|
||||
throw new RuntimeException("Invalid result: f[" + i + "] = " + f[i] + " != " + expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user