8286972: Support the new loop induction variable related PopulateIndex IR node on x86

Reviewed-by: kvn, jbhateja
This commit is contained in:
Sandhya Viswanathan 2022-05-23 15:28:32 +00:00
parent 8122466fbb
commit 5d8d6da36a
4 changed files with 249 additions and 22 deletions

View File

@ -2274,6 +2274,84 @@ void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegist
}
}
void C2_MacroAssembler::vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc) {
assert(UseAVX >= 2, "required");
#ifdef ASSERT
bool is_bw = ((elem_bt == T_BYTE) || (elem_bt == T_SHORT));
bool is_bw_supported = VM_Version::supports_avx512bw();
if (is_bw && !is_bw_supported) {
assert(vlen_enc != Assembler::AVX_512bit, "required");
assert((dst->encoding() < 16) && (src1->encoding() < 16) && (src2->encoding() < 16),
"XMM register should be 0-15");
}
#endif // ASSERT
switch (elem_bt) {
case T_BYTE: vpaddb(dst, src1, src2, vlen_enc); return;
case T_SHORT: vpaddw(dst, src1, src2, vlen_enc); return;
case T_INT: vpaddd(dst, src1, src2, vlen_enc); return;
case T_FLOAT: vaddps(dst, src1, src2, vlen_enc); return;
case T_LONG: vpaddq(dst, src1, src2, vlen_enc); return;
case T_DOUBLE: vaddpd(dst, src1, src2, vlen_enc); return;
default: assert(false, "%s", type2name(elem_bt));
}
}
#ifdef _LP64
void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc) {
assert(UseAVX >= 2, "required");
bool is_bw = ((elem_bt == T_BYTE) || (elem_bt == T_SHORT));
bool is_vl = vlen_enc != Assembler::AVX_512bit;
if ((UseAVX > 2) &&
(!is_bw || VM_Version::supports_avx512bw()) &&
(!is_vl || VM_Version::supports_avx512vl())) {
switch (elem_bt) {
case T_BYTE: evpbroadcastb(dst, src, vlen_enc); return;
case T_SHORT: evpbroadcastw(dst, src, vlen_enc); return;
case T_FLOAT: case T_INT: evpbroadcastd(dst, src, vlen_enc); return;
case T_DOUBLE: case T_LONG: evpbroadcastq(dst, src, vlen_enc); return;
default: assert(false, "%s", type2name(elem_bt));
}
} else {
assert(vlen_enc != Assembler::AVX_512bit, "required");
assert((dst->encoding() < 16),"XMM register should be 0-15");
switch (elem_bt) {
case T_BYTE: movdl(dst, src); vpbroadcastb(dst, dst, vlen_enc); return;
case T_SHORT: movdl(dst, src); vpbroadcastw(dst, dst, vlen_enc); return;
case T_INT: movdl(dst, src); vpbroadcastd(dst, dst, vlen_enc); return;
case T_FLOAT: movdl(dst, src); vbroadcastss(dst, dst, vlen_enc); return;
case T_LONG: movdq(dst, src); vpbroadcastq(dst, dst, vlen_enc); return;
case T_DOUBLE: movdq(dst, src); vbroadcastsd(dst, dst, vlen_enc); return;
default: assert(false, "%s", type2name(elem_bt));
}
}
}
#endif
void C2_MacroAssembler::vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc) {
switch (to_elem_bt) {
case T_SHORT:
vpmovsxbw(dst, src, vlen_enc);
break;
case T_INT:
vpmovsxbd(dst, src, vlen_enc);
break;
case T_FLOAT:
vpmovsxbd(dst, src, vlen_enc);
vcvtdq2ps(dst, dst, vlen_enc);
break;
case T_LONG:
vpmovsxbq(dst, src, vlen_enc);
break;
case T_DOUBLE: {
int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
vpmovsxbd(dst, src, mid_vlen_enc);
vcvtdq2pd(dst, dst, vlen_enc);
break;
}
default: assert(false, "%s", type2name(to_elem_bt));
}
}
//-------------------------------------------------------------------------------------------
// IndexOf for constant substrings with size >= 8 chars

View File

@ -132,6 +132,13 @@ public:
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);
// Covert B2X
void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc);
#ifdef _LP64
void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
#endif
void vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len);

View File

@ -1468,6 +1468,11 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_PopulateIndex:
if (!is_LP64 || (UseAVX < 2)) {
return false;
}
break;
case Op_RoundVF:
if (UseAVX < 2) { // enabled for AVX2 only
return false;
@ -1811,6 +1816,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false; // Implementation limitation
}
break;
case Op_PopulateIndex:
if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
return false;
}
case Op_VectorCastB2X:
case Op_VectorCastS2X:
case Op_VectorCastI2X:
@ -6918,28 +6927,7 @@ instruct vcastBtoX(vec dst, vec src) %{
BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
int vlen_enc = vector_length_encoding(this);
switch (to_elem_bt) {
case T_SHORT:
__ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
break;
case T_INT:
__ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
break;
case T_FLOAT:
__ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
break;
case T_LONG:
__ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
break;
case T_DOUBLE: {
int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
__ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
__ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
break;
}
default: assert(false, "%s", type2name(to_elem_bt));
}
__ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@ -8272,6 +8260,45 @@ instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{
ins_pipe( pipe_slow );
%}
#ifdef _LP64
instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp, rRegP scratch) %{
match(Set dst (PopulateIndex src1 src2));
effect(TEMP dst, TEMP vtmp, TEMP scratch);
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp and $scratch as TEMP" %}
ins_encode %{
assert($src2$$constant == 1, "required");
int vlen = Matcher::vector_length(this);
int vlen_enc = vector_length_encoding(this);
BasicType elem_bt = Matcher::vector_element_basic_type(this);
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
__ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen);
if (elem_bt != T_BYTE) {
__ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp, rRegP scratch) %{
match(Set dst (PopulateIndex src1 src2));
effect(TEMP dst, TEMP vtmp, TEMP scratch);
format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp and $scratch as TEMP" %}
ins_encode %{
assert($src2$$constant == 1, "required");
int vlen = Matcher::vector_length(this);
int vlen_enc = vector_length_encoding(this);
BasicType elem_bt = Matcher::vector_element_basic_type(this);
__ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
__ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen);
if (elem_bt != T_BYTE) {
__ vconvert_b2x(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
__ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
#endif
//-------------------------------- Rearrange ----------------------------------
// LoadShuffle/Rearrange for Byte

View File

@ -0,0 +1,115 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8286972
* @summary Test vectorization of loop induction variable usage in the loop
* @requires vm.compiler2.enabled
* @requires (os.simpleArch == "x64" & vm.cpu.features ~= ".*avx2.*") |
* (os.simpleArch == "aarch64" & vm.cpu.features ~= ".*sve.*")
* @library /test/lib /
* @run driver compiler.vectorization.TestPopulateIndex
*/
package compiler.vectorization;
import compiler.lib.ir_framework.*;
import java.util.Random;
public class TestPopulateIndex {
private static final int count = 10000;
private int[] idx;
private int[] src;
private int[] dst;
private float[] f;
public static void main(String args[]) {
TestFramework.run(TestPopulateIndex.class);
}
public TestPopulateIndex() {
idx = new int[count];
src = new int[count];
dst = new int[count];
f = new float[count];
Random ran = new Random(0);
for (int i = 0; i < count; i++) {
src[i] = ran.nextInt();
}
}
@Test
@IR(counts = {"PopulateIndex", ">= 1"})
public void indexArrayFill() {
for (int i = 0; i < count; i++) {
idx[i] = i;
}
checkResultIndexArrayFill();
}
public void checkResultIndexArrayFill() {
for (int i = 0; i < count; i++) {
int expected = i;
if (idx[i] != expected) {
throw new RuntimeException("Invalid result: idx[" + i + "] = " + idx[i] + " != " + expected);
}
}
}
@Test
@IR(counts = {"PopulateIndex", ">= 1"})
public void exprWithIndex1() {
for (int i = 0; i < count; i++) {
dst[i] = src[i] * (i & 7);
}
checkResultExprWithIndex1();
}
public void checkResultExprWithIndex1() {
for (int i = 0; i < count; i++) {
int expected = src[i] * (i & 7);
if (dst[i] != expected) {
throw new RuntimeException("Invalid result: dst[" + i + "] = " + dst[i] + " != " + expected);
}
}
}
@Test
@IR(counts = {"PopulateIndex", ">= 1"})
public void exprWithIndex2() {
for (int i = 0; i < count; i++) {
f[i] = i * i + 100;
}
checkResultExprWithIndex2();
}
public void checkResultExprWithIndex2() {
for (int i = 0; i < count; i++) {
float expected = i * i + 100;
if (f[i] != expected) {
throw new RuntimeException("Invalid result: f[" + i + "] = " + f[i] + " != " + expected);
}
}
}
}