mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-07 22:50:49 +00:00
8277426: Optimize mask reduction operations on x86
Reviewed-by: sviswanathan, jiefu
This commit is contained in:
parent
3a4a94e5a8
commit
560f9c9372
@ -4306,6 +4306,20 @@ void Assembler::vpmovmskb(Register dst, XMMRegister src, int vec_enc) {
|
||||
emit_int16((unsigned char)0xD7, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vmovmskps(Register dst, XMMRegister src, int vec_enc) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vec_enc, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16(0x50, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vmovmskpd(Register dst, XMMRegister src, int vec_enc) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vec_enc, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16(0x50, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
|
||||
assert((VM_Version::supports_avx2() && vector_len == AVX_256bit), "");
|
||||
InstructionMark im(this);
|
||||
|
||||
@ -1774,6 +1774,8 @@ private:
|
||||
|
||||
void pmovmskb(Register dst, XMMRegister src);
|
||||
void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
|
||||
void vmovmskps(Register dst, XMMRegister src, int vec_enc);
|
||||
void vmovmskpd(Register dst, XMMRegister src, int vec_enc);
|
||||
void vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
// SSE 4.1 extract
|
||||
|
||||
@ -4060,61 +4060,123 @@ void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, KRegister mask,
|
||||
Register tmp, int masklen, int masksize,
|
||||
int vec_enc) {
|
||||
void C2_MacroAssembler::vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen) {
|
||||
switch(opc) {
|
||||
case Op_VectorMaskTrueCount:
|
||||
popcntq(dst, tmp);
|
||||
break;
|
||||
case Op_VectorMaskLastTrue:
|
||||
if (VM_Version::supports_lzcnt()) {
|
||||
lzcntq(tmp, tmp);
|
||||
movl(dst, 63);
|
||||
subl(dst, tmp);
|
||||
} else {
|
||||
movl(dst, -1);
|
||||
bsrq(tmp, tmp);
|
||||
cmov32(Assembler::notZero, dst, tmp);
|
||||
}
|
||||
break;
|
||||
case Op_VectorMaskFirstTrue:
|
||||
if (VM_Version::supports_bmi1()) {
|
||||
if (masklen < 32) {
|
||||
orl(tmp, 1 << masklen);
|
||||
tzcntl(dst, tmp);
|
||||
} else if (masklen == 32) {
|
||||
tzcntl(dst, tmp);
|
||||
} else {
|
||||
assert(masklen == 64, "");
|
||||
tzcntq(dst, tmp);
|
||||
}
|
||||
} else {
|
||||
if (masklen < 32) {
|
||||
orl(tmp, 1 << masklen);
|
||||
bsfl(dst, tmp);
|
||||
} else {
|
||||
assert(masklen == 32 || masklen == 64, "");
|
||||
movl(dst, masklen);
|
||||
if (masklen == 32) {
|
||||
bsfl(tmp, tmp);
|
||||
} else {
|
||||
bsfq(tmp, tmp);
|
||||
}
|
||||
cmov32(Assembler::notZero, dst, tmp);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Op_VectorMaskToLong:
|
||||
assert(dst == tmp, "Dst and tmp should be the same for toLong operations");
|
||||
break;
|
||||
default: assert(false, "Unhandled mask operation");
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp,
|
||||
int masklen, int masksize, int vec_enc) {
|
||||
assert(VM_Version::supports_popcnt(), "");
|
||||
|
||||
if(VM_Version::supports_avx512bw()) {
|
||||
kmovql(tmp, mask);
|
||||
} else {
|
||||
assert(masklen <= 16, "");
|
||||
kmovwl(tmp, mask);
|
||||
}
|
||||
if (masksize < 16) {
|
||||
andq(tmp, (((jlong)1 << masklen) - 1));
|
||||
}
|
||||
switch(opc) {
|
||||
case Op_VectorMaskTrueCount:
|
||||
popcntq(dst, tmp);
|
||||
break;
|
||||
case Op_VectorMaskLastTrue:
|
||||
mov64(dst, -1);
|
||||
bsrq(tmp, tmp);
|
||||
cmov(Assembler::notZero, dst, tmp);
|
||||
break;
|
||||
case Op_VectorMaskFirstTrue:
|
||||
mov64(dst, masklen);
|
||||
bsfq(tmp, tmp);
|
||||
cmov(Assembler::notZero, dst, tmp);
|
||||
break;
|
||||
default: assert(false, "Unhandled mask operation");
|
||||
|
||||
// Mask generated out of partial vector comparisons/replicate/mask manipulation
|
||||
// operations needs to be clipped.
|
||||
if (masksize < 16 && opc != Op_VectorMaskFirstTrue) {
|
||||
andq(tmp, (1 << masklen) - 1);
|
||||
}
|
||||
|
||||
vector_mask_operation_helper(opc, dst, tmp, masklen);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
|
||||
XMMRegister xtmp1, Register tmp, int masklen, int masksize,
|
||||
int vec_enc) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
vpxor(xtmp, xtmp, xtmp, vec_enc);
|
||||
vpsubb(xtmp, xtmp, mask, vec_enc);
|
||||
vpmovmskb(tmp, xtmp, vec_enc);
|
||||
if (masksize < 16) {
|
||||
andq(tmp, (((jlong)1 << masklen) - 1));
|
||||
Register tmp, int masklen, BasicType bt, int vec_enc) {
|
||||
assert(vec_enc == AVX_128bit && VM_Version::supports_avx() ||
|
||||
vec_enc == AVX_256bit && (VM_Version::supports_avx2() || type2aelembytes(bt) >= 4), "");
|
||||
assert(VM_Version::supports_popcnt(), "");
|
||||
|
||||
bool need_clip = false;
|
||||
switch(bt) {
|
||||
case T_BOOLEAN:
|
||||
// While masks of other types contain 0, -1; boolean masks contain lane values of 0, 1
|
||||
vpxor(xtmp, xtmp, xtmp, vec_enc);
|
||||
vpsubb(xtmp, xtmp, mask, vec_enc);
|
||||
vpmovmskb(tmp, xtmp, vec_enc);
|
||||
need_clip = masklen < 16;
|
||||
break;
|
||||
case T_BYTE:
|
||||
vpmovmskb(tmp, mask, vec_enc);
|
||||
need_clip = masklen < 16;
|
||||
break;
|
||||
case T_SHORT:
|
||||
vpacksswb(xtmp, mask, mask, vec_enc);
|
||||
if (masklen >= 16) {
|
||||
vpermpd(xtmp, xtmp, 8, vec_enc);
|
||||
}
|
||||
vpmovmskb(tmp, xtmp, Assembler::AVX_128bit);
|
||||
need_clip = masklen < 16;
|
||||
break;
|
||||
case T_INT:
|
||||
case T_FLOAT:
|
||||
vmovmskps(tmp, mask, vec_enc);
|
||||
need_clip = masklen < 4;
|
||||
break;
|
||||
case T_LONG:
|
||||
case T_DOUBLE:
|
||||
vmovmskpd(tmp, mask, vec_enc);
|
||||
need_clip = masklen < 2;
|
||||
break;
|
||||
default: assert(false, "Unhandled type, %s", type2name(bt));
|
||||
}
|
||||
switch(opc) {
|
||||
case Op_VectorMaskTrueCount:
|
||||
popcntq(dst, tmp);
|
||||
break;
|
||||
case Op_VectorMaskLastTrue:
|
||||
mov64(dst, -1);
|
||||
bsrq(tmp, tmp);
|
||||
cmov(Assembler::notZero, dst, tmp);
|
||||
break;
|
||||
case Op_VectorMaskFirstTrue:
|
||||
mov64(dst, masklen);
|
||||
bsfq(tmp, tmp);
|
||||
cmov(Assembler::notZero, dst, tmp);
|
||||
break;
|
||||
default: assert(false, "Unhandled mask operation");
|
||||
|
||||
// Mask generated out of partial vector comparisons/replicate/mask manipulation
|
||||
// operations needs to be clipped.
|
||||
if (need_clip && opc != Op_VectorMaskFirstTrue) {
|
||||
// need_clip implies masklen < 32
|
||||
andq(tmp, (1 << masklen) - 1);
|
||||
}
|
||||
|
||||
vector_mask_operation_helper(opc, dst, tmp, masklen);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -224,10 +224,12 @@ public:
|
||||
|
||||
public:
|
||||
#ifdef _LP64
|
||||
void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen);
|
||||
|
||||
void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc);
|
||||
|
||||
void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, XMMRegister xtmp1,
|
||||
Register tmp, int masklen, int masksize, int vec_enc);
|
||||
void vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
|
||||
Register tmp, int masklen, BasicType bt, int vec_enc);
|
||||
#endif
|
||||
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
|
||||
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
|
||||
|
||||
@ -8647,43 +8647,45 @@ instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
|
||||
effect(TEMP dst, KILL cr);
|
||||
format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
|
||||
ins_encode %{
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int opcode = this->ideal_Opcode();
|
||||
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ kmovql($dst$$Register, $mask$$KRegister);
|
||||
} else {
|
||||
assert(mask_len <= 16, "");
|
||||
__ kmovwl($dst$$Register, $mask$$KRegister);
|
||||
}
|
||||
// Mask generated out of partial vector comparisons/replicate/mask manipulation
|
||||
// operations needs to be clipped.
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int mask_size = mask_len * type2aelembytes(mbt);
|
||||
if (mask_size < 16) {
|
||||
__ andq($dst$$Register, (((jlong)1 << mask_len) - 1));
|
||||
}
|
||||
int vlen_enc = vector_length_encoding(this, $mask);
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
|
||||
$dst$$Register, mask_len, mask_size, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_tolong_avx(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
|
||||
predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL &&
|
||||
n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
|
||||
instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
|
||||
predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL);
|
||||
match(Set dst (VectorMaskToLong mask));
|
||||
format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
|
||||
effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int vlen_enc = vector_length_encoding(this, $mask);
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
|
||||
$dst$$Register, mask_len, mbt, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
|
||||
predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL);
|
||||
match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
|
||||
format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
|
||||
effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
|
||||
ins_encode %{
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int opcode = this->ideal_Opcode();
|
||||
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int vlen_enc = vector_length_encoding(this, $mask);
|
||||
__ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
|
||||
__ vpsubb($xtmp$$XMMRegister, $xtmp$$XMMRegister, $mask$$XMMRegister, vlen_enc);
|
||||
__ vpmovmskb($dst$$Register, $xtmp$$XMMRegister, vlen_enc);
|
||||
// Mask generated out of partial vector comparisons/replicate/mask manipulation
|
||||
// operations needs to be clipped.
|
||||
int mask_size = mask_len * type2aelembytes(mbt);
|
||||
if (mask_size < 16) {
|
||||
__ andq($dst$$Register, (((jlong)1 << mask_len) - 1));
|
||||
}
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
|
||||
$dst$$Register, mask_len, mbt, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -8699,25 +8701,40 @@ instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int mask_size = mask_len * type2aelembytes(mbt);
|
||||
int vlen_enc = vector_length_encoding(this, $mask);
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, $tmp$$Register,
|
||||
mask_len, mask_size, vlen_enc);
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
|
||||
$tmp$$Register, mask_len, mask_size, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_truecount_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{
|
||||
instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
|
||||
predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL);
|
||||
match(Set dst (VectorMaskTrueCount mask));
|
||||
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr);
|
||||
format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp and $xtmp1 as TEMP" %}
|
||||
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
|
||||
format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int mask_size = mask_len * type2aelembytes(mbt);
|
||||
int vlen_enc = vector_length_encoding(this, $mask);
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
|
||||
$xtmp1$$XMMRegister, $tmp$$Register, mask_len, mask_size, vlen_enc);
|
||||
$tmp$$Register, mask_len, mbt, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
|
||||
predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL);
|
||||
match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
|
||||
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
|
||||
format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int vlen_enc = vector_length_encoding(this, $mask);
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
|
||||
$tmp$$Register, mask_len, mbt, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -8734,26 +8751,42 @@ instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsRe
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int mask_size = mask_len * type2aelembytes(mbt);
|
||||
int vlen_enc = vector_length_encoding(this, $mask);
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, $tmp$$Register, mask_len,
|
||||
mask_size, vlen_enc);
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
|
||||
$tmp$$Register, mask_len, mask_size, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{
|
||||
instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
|
||||
predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL);
|
||||
match(Set dst (VectorMaskFirstTrue mask));
|
||||
match(Set dst (VectorMaskLastTrue mask));
|
||||
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr);
|
||||
format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp and $xtmp1 as TEMP" %}
|
||||
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
|
||||
format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int mask_size = mask_len * type2aelembytes(mbt);
|
||||
int vlen_enc = vector_length_encoding(this, $mask);
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
|
||||
$xtmp1$$XMMRegister, $tmp$$Register, mask_len, mask_size, vlen_enc);
|
||||
$tmp$$Register, mask_len, mbt, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
|
||||
predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL);
|
||||
match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
|
||||
match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
|
||||
effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
|
||||
format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
|
||||
int mask_len = Matcher::vector_length(this, $mask);
|
||||
int vlen_enc = vector_length_encoding(this, $mask);
|
||||
__ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
|
||||
$tmp$$Register, mask_len, mbt, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user