mirror of
https://github.com/openjdk/jdk.git
synced 2026-05-13 15:09:39 +00:00
8153998: Masked vector post loops
Masked vectorization for post loops to execute in a single iteration in place of fixup scalar loops which used to take many iterations to complete work for user loops. Reviewed-by: twisti, kvn
This commit is contained in:
parent
332ffb710a
commit
64cf7e6b6f
@ -3346,6 +3346,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
|
||||
@ -2047,6 +2047,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
|
||||
@ -1904,6 +1904,10 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
|
||||
@ -1240,6 +1240,7 @@ void Assembler::addr_nop_8() {
|
||||
void Assembler::addsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -1250,6 +1251,7 @@ void Assembler::addsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_operand(dst, src);
|
||||
@ -1599,6 +1601,7 @@ void Assembler::comisd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x2F);
|
||||
emit_operand(dst, src);
|
||||
@ -1607,6 +1610,7 @@ void Assembler::comisd(XMMRegister dst, Address src) {
|
||||
void Assembler::comisd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x2F);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -1733,6 +1737,7 @@ void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5A);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -1743,6 +1748,7 @@ void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5A);
|
||||
emit_operand(dst, src);
|
||||
@ -1849,6 +1855,7 @@ void Assembler::divsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_operand(dst, src);
|
||||
@ -1857,6 +1864,7 @@ void Assembler::divsd(XMMRegister dst, Address src) {
|
||||
void Assembler::divsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -2131,6 +2139,7 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x28);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -2165,6 +2174,7 @@ void Assembler::movddup(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
|
||||
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x12);
|
||||
emit_int8(0xC0 | encode);
|
||||
@ -2202,6 +2212,15 @@ void Assembler::kmovwl(Register dst, KRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::kmovwl(KRegister dst, Address src) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0x90);
|
||||
emit_operand((Register)dst, src);
|
||||
}
|
||||
|
||||
void Assembler::kmovdl(KRegister dst, Register src) {
|
||||
assert(VM_Version::supports_avx512bw(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
@ -2260,6 +2279,14 @@ void Assembler::kmovql(Register dst, KRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::knotwl(KRegister dst, KRegister src) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0x44);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
// This instruction produces ZF or CF flags
|
||||
void Assembler::kortestbl(KRegister src1, KRegister src2) {
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
@ -2432,6 +2459,7 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
|
||||
void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
@ -2444,6 +2472,7 @@ void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
emit_operand(dst, src);
|
||||
@ -2456,6 +2485,7 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
emit_operand(src, dst);
|
||||
@ -2464,6 +2494,7 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
|
||||
void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
@ -2475,6 +2506,7 @@ void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
@ -2487,6 +2519,7 @@ void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
@ -2518,8 +2551,8 @@ void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
|
||||
assert(src != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
emit_operand(src, dst);
|
||||
@ -2538,8 +2571,8 @@ void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
emit_operand(dst, src);
|
||||
@ -2550,8 +2583,8 @@ void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
|
||||
assert(src != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
emit_operand(src, dst);
|
||||
@ -2601,6 +2634,7 @@ void Assembler::movlpd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x12);
|
||||
emit_operand(dst, src);
|
||||
@ -2631,6 +2665,7 @@ void Assembler::movq(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7E);
|
||||
emit_operand(dst, src);
|
||||
@ -2641,6 +2676,7 @@ void Assembler::movq(Address dst, XMMRegister src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD6);
|
||||
emit_operand(src, dst);
|
||||
@ -2665,6 +2701,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb
|
||||
void Assembler::movsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x10);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -2675,6 +2712,7 @@ void Assembler::movsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x10);
|
||||
emit_operand(dst, src);
|
||||
@ -2685,6 +2723,7 @@ void Assembler::movsd(Address dst, XMMRegister src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x11);
|
||||
emit_operand(src, dst);
|
||||
@ -2808,6 +2847,7 @@ void Assembler::mulsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_operand(dst, src);
|
||||
@ -2816,6 +2856,7 @@ void Assembler::mulsd(XMMRegister dst, Address src) {
|
||||
void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -3795,6 +3836,7 @@ void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4090,6 +4132,7 @@ void Assembler::smovl() {
|
||||
void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x51);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4100,6 +4143,7 @@ void Assembler::sqrtsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x51);
|
||||
emit_operand(dst, src);
|
||||
@ -4175,6 +4219,7 @@ void Assembler::subl(Register dst, Register src) {
|
||||
void Assembler::subsd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4185,6 +4230,7 @@ void Assembler::subsd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_operand(dst, src);
|
||||
@ -4272,6 +4318,7 @@ void Assembler::ucomisd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x2E);
|
||||
emit_operand(dst, src);
|
||||
@ -4280,6 +4327,7 @@ void Assembler::ucomisd(XMMRegister dst, Address src) {
|
||||
void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x2E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4391,6 +4439,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_operand(dst, src);
|
||||
@ -4399,6 +4448,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4427,6 +4477,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_operand(dst, src);
|
||||
@ -4435,6 +4486,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4463,6 +4515,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_operand(dst, src);
|
||||
@ -4471,6 +4524,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4499,6 +4553,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_operand(dst, src);
|
||||
@ -4507,6 +4562,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
|
||||
void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4537,6 +4593,7 @@ void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
void Assembler::addpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4546,6 +4603,7 @@ void Assembler::addpd(XMMRegister dst, Address src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
@ -4564,6 +4622,7 @@ void Assembler::addps(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4582,6 +4641,7 @@ void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x58);
|
||||
emit_operand(dst, src);
|
||||
@ -4600,6 +4660,7 @@ void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
void Assembler::subpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4616,6 +4677,7 @@ void Assembler::subps(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4634,6 +4696,7 @@ void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5C);
|
||||
emit_operand(dst, src);
|
||||
@ -4652,6 +4715,7 @@ void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4662,6 +4726,7 @@ void Assembler::mulpd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_operand(dst, src);
|
||||
@ -4678,6 +4743,7 @@ void Assembler::mulps(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4696,6 +4762,7 @@ void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_operand(dst, src);
|
||||
@ -4714,6 +4781,7 @@ void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
void Assembler::divpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4730,6 +4798,7 @@ void Assembler::divps(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4748,6 +4817,7 @@ void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5E);
|
||||
emit_operand(dst, src);
|
||||
@ -4766,6 +4836,7 @@ void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x51);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4776,6 +4847,7 @@ void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x51);
|
||||
emit_operand(dst, src);
|
||||
@ -4784,6 +4856,7 @@ void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
|
||||
void Assembler::andpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x54);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4812,6 +4885,7 @@ void Assembler::andpd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x54);
|
||||
emit_operand(dst, src);
|
||||
@ -4820,6 +4894,7 @@ void Assembler::andpd(XMMRegister dst, Address src) {
|
||||
void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x54);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4838,6 +4913,7 @@ void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x54);
|
||||
emit_operand(dst, src);
|
||||
@ -4856,6 +4932,7 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x15);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4864,6 +4941,7 @@ void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x14);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4872,6 +4950,7 @@ void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x57);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4890,6 +4969,7 @@ void Assembler::xorpd(XMMRegister dst, Address src) {
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x57);
|
||||
emit_operand(dst, src);
|
||||
@ -4908,6 +4988,7 @@ void Assembler::xorps(XMMRegister dst, Address src) {
|
||||
void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x57);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -4926,6 +5007,7 @@ void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x57);
|
||||
emit_operand(dst, src);
|
||||
@ -4996,6 +5078,7 @@ void Assembler::paddd(XMMRegister dst, Address src) {
|
||||
void Assembler::paddq(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD4);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5044,6 +5127,7 @@ void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
|
||||
void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD4);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5084,6 +5168,7 @@ void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD4);
|
||||
emit_operand(dst, src);
|
||||
@ -5115,6 +5200,7 @@ void Assembler::psubd(XMMRegister dst, XMMRegister src) {
|
||||
void Assembler::psubq(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xFB);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5147,6 +5233,7 @@ void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
|
||||
void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xFB);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5187,6 +5274,7 @@ void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xFB);
|
||||
emit_operand(dst, src);
|
||||
@ -5225,8 +5313,9 @@ void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int v
|
||||
}
|
||||
|
||||
void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 2, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
assert(UseAVX > 2, "requires some form of EVEX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x40);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5253,10 +5342,11 @@ void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vecto
|
||||
}
|
||||
|
||||
void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
assert(UseAVX > 2, "requires some form of EVEX");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x40);
|
||||
emit_operand(dst, src);
|
||||
@ -5312,6 +5402,7 @@ void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
|
||||
void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xF3);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5341,6 +5432,7 @@ void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_l
|
||||
void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
// XMM6 is for /6 encoding: 66 0F 73 /6 ib
|
||||
int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x73);
|
||||
@ -5367,6 +5459,7 @@ void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int
|
||||
void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xF3);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5398,6 +5491,7 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
|
||||
// shifts 128 bit value in xmm register by number of bytes.
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
|
||||
int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x73);
|
||||
@ -5424,6 +5518,7 @@ void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
|
||||
void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD3);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5452,6 +5547,7 @@ void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_l
|
||||
void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
// XMM2 is for /2 encoding: 66 0F 73 /2 ib
|
||||
int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x73);
|
||||
@ -5478,6 +5574,7 @@ void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int
|
||||
void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xD3);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5587,6 +5684,7 @@ void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_
|
||||
void Assembler::pandn(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xDF);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5876,9 +5974,9 @@ void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
|
||||
}
|
||||
|
||||
void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x39);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -5966,9 +6064,9 @@ void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
|
||||
}
|
||||
|
||||
void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x19);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -6093,7 +6191,8 @@ void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
|
||||
// duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
|
||||
void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x59);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -6103,7 +6202,8 @@ void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
@ -6138,7 +6238,8 @@ void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
|
||||
// duplicate double precision data from src into programmed locations in dest : requires AVX512VL
|
||||
void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x19);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
@ -6148,8 +6249,9 @@ void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x19);
|
||||
@ -6163,12 +6265,9 @@ void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
|
||||
void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
if (attributes.is_evex_instruction()) {
|
||||
emit_int8(0x7A);
|
||||
} else {
|
||||
emit_int8(0x78);
|
||||
}
|
||||
emit_int8(0x7A);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
@ -6176,12 +6275,9 @@ void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
|
||||
void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
if (attributes.is_evex_instruction()) {
|
||||
emit_int8(0x7B);
|
||||
} else {
|
||||
emit_int8(0x79);
|
||||
}
|
||||
emit_int8(0x7B);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
@ -6189,12 +6285,9 @@ void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
|
||||
void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
if (attributes.is_evex_instruction()) {
|
||||
emit_int8(0x7C);
|
||||
} else {
|
||||
emit_int8(0x58);
|
||||
}
|
||||
emit_int8(0x7C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
@ -6202,12 +6295,9 @@ void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
|
||||
void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
if (attributes.is_evex_instruction()) {
|
||||
emit_int8(0x7C);
|
||||
} else {
|
||||
emit_int8(0x59);
|
||||
}
|
||||
emit_int8(0x7C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
@ -6862,6 +6952,9 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix
|
||||
attributes->set_is_evex_instruction();
|
||||
evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
|
||||
} else {
|
||||
if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
|
||||
attributes->set_rex_vex_w(false);
|
||||
}
|
||||
vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
|
||||
}
|
||||
}
|
||||
@ -6921,6 +7014,9 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS
|
||||
attributes->set_is_evex_instruction();
|
||||
evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
|
||||
} else {
|
||||
if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
|
||||
attributes->set_rex_vex_w(false);
|
||||
}
|
||||
vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
|
||||
}
|
||||
|
||||
@ -6975,6 +7071,21 @@ void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM
|
||||
emit_int8((unsigned char)(0xF0 & src2_enc<<4));
|
||||
}
|
||||
|
||||
void Assembler::shlxl(Register dst, Register src1, Register src2) {
|
||||
assert(VM_Version::supports_bmi2(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xF7);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::shlxq(Register dst, Register src1, Register src2) {
|
||||
assert(VM_Version::supports_bmi2(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xF7);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
|
||||
|
||||
@ -1337,6 +1337,7 @@ private:
|
||||
void kmovbl(KRegister dst, Register src);
|
||||
void kmovbl(Register dst, KRegister src);
|
||||
void kmovwl(KRegister dst, Register src);
|
||||
void kmovwl(KRegister dst, Address src);
|
||||
void kmovwl(Register dst, KRegister src);
|
||||
void kmovdl(KRegister dst, Register src);
|
||||
void kmovdl(Register dst, KRegister src);
|
||||
@ -1346,6 +1347,8 @@ private:
|
||||
void kmovql(KRegister dst, Register src);
|
||||
void kmovql(Register dst, KRegister src);
|
||||
|
||||
void knotwl(KRegister dst, KRegister src);
|
||||
|
||||
void kortestbl(KRegister dst, KRegister src);
|
||||
void kortestwl(KRegister dst, KRegister src);
|
||||
void kortestdl(KRegister dst, KRegister src);
|
||||
@ -2052,6 +2055,8 @@ private:
|
||||
void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
|
||||
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
|
||||
void shlxl(Register dst, Register src1, Register src2);
|
||||
void shlxq(Register dst, Register src1, Register src2);
|
||||
|
||||
protected:
|
||||
// Next instructions require address alignment 16 bytes SSE mode.
|
||||
@ -2077,6 +2082,7 @@ public:
|
||||
:
|
||||
_avx_vector_len(vector_len),
|
||||
_rex_vex_w(rex_vex_w),
|
||||
_rex_vex_w_reverted(false),
|
||||
_legacy_mode(legacy_mode),
|
||||
_no_reg_mask(no_reg_mask),
|
||||
_uses_vl(uses_vl),
|
||||
@ -2100,6 +2106,7 @@ public:
|
||||
private:
|
||||
int _avx_vector_len;
|
||||
bool _rex_vex_w;
|
||||
bool _rex_vex_w_reverted;
|
||||
bool _legacy_mode;
|
||||
bool _no_reg_mask;
|
||||
bool _uses_vl;
|
||||
@ -2116,6 +2123,7 @@ public:
|
||||
// query functions for field accessors
|
||||
int get_vector_len(void) const { return _avx_vector_len; }
|
||||
bool is_rex_vex_w(void) const { return _rex_vex_w; }
|
||||
bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
|
||||
bool is_legacy_mode(void) const { return _legacy_mode; }
|
||||
bool is_no_reg_mask(void) const { return _no_reg_mask; }
|
||||
bool uses_vl(void) const { return _uses_vl; }
|
||||
@ -2129,6 +2137,12 @@ public:
|
||||
// Set the vector len manually
|
||||
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
|
||||
|
||||
// Set revert rex_vex_w for avx encoding
|
||||
void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
|
||||
|
||||
// Set rex_vex_w based on state
|
||||
void set_rex_vex_w(bool state) { _rex_vex_w = state; }
|
||||
|
||||
// Set the instruction to be encoded in AVX mode
|
||||
void set_is_legacy_mode(void) { _legacy_mode = true; }
|
||||
|
||||
|
||||
@ -3399,6 +3399,18 @@ void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::setvectmask(Register dst, Register src) {
|
||||
Assembler::movl(dst, 1);
|
||||
Assembler::shlxl(dst, dst, src);
|
||||
Assembler::decl(dst);
|
||||
Assembler::kmovdl(k1, dst);
|
||||
Assembler::movl(dst, src);
|
||||
}
|
||||
|
||||
void MacroAssembler::restorevectmask() {
|
||||
Assembler::knotwl(k1, k0);
|
||||
}
|
||||
|
||||
void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
|
||||
if (reachable(src)) {
|
||||
if (UseXmmLoadAndClearUpper) {
|
||||
|
||||
@ -156,6 +156,10 @@ class MacroAssembler: public Assembler {
|
||||
void incrementq(Register reg, int value = 1);
|
||||
void incrementq(Address dst, int value = 1);
|
||||
|
||||
// special instructions for EVEX
|
||||
void setvectmask(Register dst, Register src);
|
||||
void restorevectmask();
|
||||
|
||||
// Support optimal SSE move instructions.
|
||||
void movflt(XMMRegister dst, XMMRegister src) {
|
||||
if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
|
||||
|
||||
@ -1758,6 +1758,15 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
||||
return ret_value; // Per default match rules are supported.
|
||||
}
|
||||
|
||||
const bool Matcher::has_predicated_vectors(void) {
|
||||
bool ret_value = false;
|
||||
if (UseAVX > 2) {
|
||||
ret_value = VM_Version::supports_avx512vl();
|
||||
}
|
||||
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
int float_pressure_threshold = default_pressure_threshold;
|
||||
#ifdef _LP64
|
||||
@ -1875,7 +1884,7 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
|
||||
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
|
||||
break;
|
||||
case Op_VecZ:
|
||||
__ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
|
||||
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -1930,7 +1939,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
||||
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
|
||||
break;
|
||||
case Op_VecZ:
|
||||
__ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
|
||||
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -1950,7 +1959,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
||||
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
|
||||
break;
|
||||
case Op_VecZ:
|
||||
__ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
|
||||
__ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -2176,6 +2185,19 @@ instruct ShouldNotReachHere() %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// =================================EVEX special===============================
|
||||
|
||||
instruct setMask(rRegI dst, rRegI src) %{
|
||||
predicate(Matcher::has_predicated_vectors());
|
||||
match(Set dst (SetVectMaskI src));
|
||||
effect(TEMP dst);
|
||||
format %{ "setvectmask $dst, $src" %}
|
||||
ins_encode %{
|
||||
__ setvectmask($dst$$Register, $src$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ============================================================================
|
||||
|
||||
instruct addF_reg(regF dst, regF src) %{
|
||||
@ -3069,11 +3091,11 @@ instruct loadV32(vecY dst, memory mem) %{
|
||||
%}
|
||||
|
||||
// Load vectors (64 bytes long)
|
||||
instruct loadV64(vecZ dst, memory mem) %{
|
||||
predicate(n->as_LoadVector()->memory_size() == 64);
|
||||
instruct loadV64_dword(vecZ dst, memory mem) %{
|
||||
predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4);
|
||||
match(Set dst (LoadVector mem));
|
||||
ins_cost(125);
|
||||
format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
|
||||
format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %}
|
||||
ins_encode %{
|
||||
int vector_len = 2;
|
||||
__ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
@ -3081,6 +3103,19 @@ instruct loadV64(vecZ dst, memory mem) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// Load vectors (64 bytes long)
|
||||
instruct loadV64_qword(vecZ dst, memory mem) %{
|
||||
predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4);
|
||||
match(Set dst (LoadVector mem));
|
||||
ins_cost(125);
|
||||
format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %}
|
||||
ins_encode %{
|
||||
int vector_len = 2;
|
||||
__ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// Store vectors
|
||||
instruct storeV4(memory mem, vecS src) %{
|
||||
predicate(n->as_StoreVector()->memory_size() == 4);
|
||||
@ -3126,11 +3161,11 @@ instruct storeV32(memory mem, vecY src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct storeV64(memory mem, vecZ src) %{
|
||||
predicate(n->as_StoreVector()->memory_size() == 64);
|
||||
instruct storeV64_dword(memory mem, vecZ src) %{
|
||||
predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4);
|
||||
match(Set mem (StoreVector mem src));
|
||||
ins_cost(145);
|
||||
format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
|
||||
format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %}
|
||||
ins_encode %{
|
||||
int vector_len = 2;
|
||||
__ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len);
|
||||
@ -3138,6 +3173,18 @@ instruct storeV64(memory mem, vecZ src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct storeV64_qword(memory mem, vecZ src) %{
|
||||
predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4);
|
||||
match(Set mem (StoreVector mem src));
|
||||
ins_cost(145);
|
||||
format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %}
|
||||
ins_encode %{
|
||||
int vector_len = 2;
|
||||
__ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// ====================LEGACY REPLICATE=======================================
|
||||
|
||||
instruct Repl4B_mem(vecS dst, memory mem) %{
|
||||
|
||||
@ -1021,10 +1021,10 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
|
||||
__ vmovdqu(xmm0, Address(rsp, -32));
|
||||
break;
|
||||
case Op_VecZ:
|
||||
__ evmovdqul(Address(rsp, -64), xmm0, 2);
|
||||
__ evmovdqul(xmm0, Address(rsp, src_offset), 2);
|
||||
__ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
|
||||
__ evmovdqul(xmm0, Address(rsp, -64), 2);
|
||||
__ evmovdquq(Address(rsp, -64), xmm0, 2);
|
||||
__ evmovdquq(xmm0, Address(rsp, src_offset), 2);
|
||||
__ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
|
||||
__ evmovdquq(xmm0, Address(rsp, -64), 2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -12047,6 +12047,7 @@ instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cr);
|
||||
effect(USE labl);
|
||||
|
||||
@ -12062,6 +12063,7 @@ instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
@ -12076,6 +12078,7 @@ instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
|
||||
%}
|
||||
|
||||
instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
@ -12089,6 +12092,60 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
||||
// mask version
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cr);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "J$cop $labl\t# Loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "J$cop,u $labl\t# Loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
||||
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(300);
|
||||
format %{ "J$cop,u $labl\t# Loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
||||
// Jump Direct Conditional - using unsigned comparison
|
||||
instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
|
||||
match(If cop cmp);
|
||||
|
||||
@ -1081,10 +1081,10 @@ static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
|
||||
__ vmovdqu(xmm0, Address(rsp, -32));
|
||||
break;
|
||||
case Op_VecZ:
|
||||
__ evmovdqul(Address(rsp, -64), xmm0, 2);
|
||||
__ evmovdqul(xmm0, Address(rsp, src_offset), 2);
|
||||
__ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
|
||||
__ evmovdqul(xmm0, Address(rsp, -64), 2);
|
||||
__ evmovdquq(Address(rsp, -64), xmm0, 2);
|
||||
__ evmovdquq(xmm0, Address(rsp, src_offset), 2);
|
||||
__ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
|
||||
__ evmovdquq(xmm0, Address(rsp, -64), 2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
@ -11443,6 +11443,7 @@ instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
|
||||
%{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cr);
|
||||
effect(USE labl);
|
||||
|
||||
@ -11458,6 +11459,7 @@ instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
@ -11472,6 +11474,7 @@ instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
|
||||
%}
|
||||
|
||||
instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
|
||||
predicate(!n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
@ -11485,6 +11488,61 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
||||
// mask version
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
|
||||
%{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cr);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "j$cop $labl\t# loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "j$cop,u $labl\t# loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
||||
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
|
||||
ins_cost(300);
|
||||
format %{ "j$cop,u $labl\t# loop end\n\t"
|
||||
"restorevectmask \t# vector mask restore for loops" %}
|
||||
size(10);
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
%}
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
||||
// Jump Direct Conditional - using unsigned comparison
|
||||
instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
|
||||
match(If cop cmp);
|
||||
|
||||
@ -178,6 +178,7 @@ macro(MachProj)
|
||||
macro(MaxI)
|
||||
macro(MemBarAcquire)
|
||||
macro(LoadFence)
|
||||
macro(SetVectMaskI)
|
||||
macro(MemBarAcquireLock)
|
||||
macro(MemBarCPUOrder)
|
||||
macro(MemBarRelease)
|
||||
|
||||
@ -374,10 +374,17 @@ bool CountedLoopReserveKit::create_reserve() {
|
||||
return false; // skip malformed counted loop
|
||||
}
|
||||
if (!cl->is_main_loop()) {
|
||||
if (TraceLoopOpts) {
|
||||
tty->print_cr("CountedLoopReserveKit::create_reserve: %d not main loop", cl->_idx);
|
||||
bool loop_not_canonical = true;
|
||||
if (cl->is_post_loop() && (cl->slp_max_unroll() > 0)) {
|
||||
loop_not_canonical = false;
|
||||
}
|
||||
// only reject some loop forms
|
||||
if (loop_not_canonical) {
|
||||
if (TraceLoopOpts) {
|
||||
tty->print_cr("CountedLoopReserveKit::create_reserve: %d not canonical loop", cl->_idx);
|
||||
}
|
||||
return false; // skip normal, pre, and post (conditionally) loops
|
||||
}
|
||||
return false; // skip normal, pre, and post loops
|
||||
}
|
||||
|
||||
_lp = _lpt->_head->as_Loop();
|
||||
|
||||
@ -2369,11 +2369,13 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool skip_loop_opts)
|
||||
if (multi_version_post_loops(lpt, lpt_next) == false) {
|
||||
// Cause the rce loop to be optimized away if we fail
|
||||
cl->mark_is_multiversioned();
|
||||
cl->set_slp_max_unroll(0);
|
||||
poison_rce_post_loop(lpt);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
sw.transform_loop(lpt, true);
|
||||
}
|
||||
} else if (cl->is_main_loop()) {
|
||||
sw.transform_loop(lpt, true);
|
||||
|
||||
@ -273,6 +273,9 @@ public:
|
||||
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
||||
static const bool match_rule_supported_vector(int opcode, int vlen);
|
||||
|
||||
// Some microarchitectures have mask registers used on vectors
|
||||
static const bool has_predicated_vectors(void);
|
||||
|
||||
// Some uarchs have different sized float register resources
|
||||
static const int float_pressure(int default_pressure_threshold);
|
||||
|
||||
|
||||
@ -722,8 +722,9 @@ public:
|
||||
Flag_avoid_back_to_back_after = Flag_avoid_back_to_back_before << 1,
|
||||
Flag_has_call = Flag_avoid_back_to_back_after << 1,
|
||||
Flag_is_reduction = Flag_has_call << 1,
|
||||
Flag_is_scheduled = Flag_is_reduction,
|
||||
Flag_is_expensive = Flag_is_scheduled << 1,
|
||||
Flag_is_scheduled = Flag_is_reduction << 1,
|
||||
Flag_has_vector_mask_set = Flag_is_scheduled << 1,
|
||||
Flag_is_expensive = Flag_has_vector_mask_set << 1,
|
||||
_max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination
|
||||
};
|
||||
|
||||
@ -912,6 +913,9 @@ public:
|
||||
// It must have the loop's phi as input and provide a def to the phi.
|
||||
bool is_reduction() const { return (_flags & Flag_is_reduction) != 0; }
|
||||
|
||||
// The node is a CountedLoopEnd with a mask annotation so as to emit a restore context
|
||||
bool has_vector_mask_set() const { return (_flags & Flag_has_vector_mask_set) != 0; }
|
||||
|
||||
// Used in lcm to mark nodes that have scheduled
|
||||
bool is_scheduled() const { return (_flags & Flag_is_scheduled) != 0; }
|
||||
|
||||
|
||||
@ -52,6 +52,7 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
|
||||
_packset(arena(), 8, 0, NULL), // packs for the current block
|
||||
_bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
|
||||
_block(arena(), 8, 0, NULL), // nodes in current block
|
||||
_post_block(arena(), 8, 0, NULL), // nodes common to current block which are marked as post loop vectorizable
|
||||
_data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside
|
||||
_mem_slice_head(arena(), 8, 0, NULL), // memory slice heads
|
||||
_mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
|
||||
@ -100,10 +101,30 @@ void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
|
||||
|
||||
if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
|
||||
|
||||
if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
|
||||
bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
|
||||
if (post_loop_allowed) {
|
||||
if (cl->is_reduction_loop()) return; // no predication mapping
|
||||
Node *limit = cl->limit();
|
||||
if (limit->is_Con()) return; // non constant limits only
|
||||
// Now check the limit for expressions we do not handle
|
||||
if (limit->is_Add()) {
|
||||
Node *in2 = limit->in(2);
|
||||
if (in2->is_Con()) {
|
||||
int val = in2->get_int();
|
||||
// should not try to program these cases
|
||||
if (val < 0) return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// skip any loop that has not been assigned max unroll by analysis
|
||||
if (do_optimization) {
|
||||
if (cl->slp_max_unroll() == 0) return;
|
||||
}
|
||||
|
||||
// Check for no control flow in body (other than exit)
|
||||
Node *cl_exit = cl->loopexit();
|
||||
if (cl_exit->in(0) != lpt->_head) {
|
||||
if (cl->is_main_loop() && (cl_exit->in(0) != lpt->_head)) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord) {
|
||||
tty->print_cr("SuperWord::transform_loop: loop too complicated, cl_exit->in(0) != lpt->_head");
|
||||
@ -121,15 +142,16 @@ void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
|
||||
return;
|
||||
}
|
||||
|
||||
// We only re-enter slp when we vector mapped a queried loop and we want to
|
||||
// continue unrolling, in this case, slp is not subsequently done.
|
||||
if (cl->do_unroll_only()) return;
|
||||
// Skip any loops already optimized by slp
|
||||
if (cl->is_vectorized_loop()) return;
|
||||
|
||||
// Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
|
||||
CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
|
||||
if (pre_end == NULL) return;
|
||||
Node *pre_opaq1 = pre_end->limit();
|
||||
if (pre_opaq1->Opcode() != Op_Opaque1) return;
|
||||
if (cl->is_main_loop()) {
|
||||
// Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
|
||||
CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
|
||||
if (pre_end == NULL) return;
|
||||
Node *pre_opaq1 = pre_end->limit();
|
||||
if (pre_opaq1->Opcode() != Op_Opaque1) return;
|
||||
}
|
||||
|
||||
init(); // initialize data structures
|
||||
|
||||
@ -142,6 +164,19 @@ void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
|
||||
if (do_optimization) {
|
||||
assert(_packset.length() == 0, "packset must be empty");
|
||||
SLP_extract();
|
||||
if (PostLoopMultiversioning && Matcher::has_predicated_vectors()) {
|
||||
if (cl->is_vectorized_loop() && cl->is_main_loop() && !cl->is_reduction_loop()) {
|
||||
IdealLoopTree *lpt_next = lpt->_next;
|
||||
CountedLoopNode *cl_next = lpt_next->_head->as_CountedLoop();
|
||||
_phase->has_range_checks(lpt_next);
|
||||
if (cl_next->is_post_loop() && !cl_next->range_checks_present()) {
|
||||
if (!cl_next->is_vectorized_loop()) {
|
||||
int slp_max_unroll_factor = cl->slp_max_unroll();
|
||||
cl_next->set_slp_max_unroll(slp_max_unroll_factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -154,6 +189,9 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
|
||||
Node_Stack nstack((int)ignored_size);
|
||||
CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
|
||||
Node *cl_exit = cl->loopexit();
|
||||
int rpo_idx = _post_block.length();
|
||||
|
||||
assert(rpo_idx == 0, "post loop block is empty");
|
||||
|
||||
// First clear the entries
|
||||
for (uint i = 0; i < lpt()->_body.size(); i++) {
|
||||
@ -161,6 +199,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
|
||||
}
|
||||
|
||||
int max_vector = Matcher::max_vector_size(T_INT);
|
||||
bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
|
||||
|
||||
// Process the loop, some/all of the stack entries will not be in order, ergo
|
||||
// need to preprocess the ignored initial state before we process the loop
|
||||
@ -259,6 +298,7 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
|
||||
if (is_slp) {
|
||||
// Now we try to find the maximum supported consistent vector which the machine
|
||||
// description can use
|
||||
bool small_basic_type = false;
|
||||
for (uint i = 0; i < lpt()->_body.size(); i++) {
|
||||
if (ignored_loop_nodes[i] != -1) continue;
|
||||
|
||||
@ -269,6 +309,26 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
|
||||
} else {
|
||||
bt = n->bottom_type()->basic_type();
|
||||
}
|
||||
|
||||
if (post_loop_allowed) {
|
||||
if (!small_basic_type) {
|
||||
switch (bt) {
|
||||
case T_CHAR:
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
small_basic_type = true;
|
||||
break;
|
||||
|
||||
case T_LONG:
|
||||
// TODO: Remove when support completed for mask context with LONG.
|
||||
// Support needs to be augmented for logical qword operations, currently we map to dword
|
||||
// buckets for vectors on logicals as these were legacy.
|
||||
small_basic_type = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (is_java_primitive(bt) == false) continue;
|
||||
|
||||
int cur_max_vector = Matcher::max_vector_size(bt);
|
||||
@ -288,6 +348,12 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
|
||||
if (cur_max_vector < max_vector) {
|
||||
max_vector = cur_max_vector;
|
||||
}
|
||||
|
||||
// We only process post loops on predicated targets where we want to
|
||||
// mask map the loop to a single iteration
|
||||
if (post_loop_allowed) {
|
||||
_post_block.at_put_grow(rpo_idx++, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_slp) {
|
||||
@ -295,7 +361,14 @@ void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
|
||||
cl->mark_passed_slp();
|
||||
}
|
||||
cl->mark_was_slp();
|
||||
cl->set_slp_max_unroll(local_loop_unroll_factor);
|
||||
if (cl->is_main_loop()) {
|
||||
cl->set_slp_max_unroll(local_loop_unroll_factor);
|
||||
} else if (post_loop_allowed) {
|
||||
if (!small_basic_type) {
|
||||
// avoid replication context for small basic types in programmable masked loops
|
||||
cl->set_slp_max_unroll(local_loop_unroll_factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -350,67 +423,104 @@ void SuperWord::SLP_extract() {
|
||||
if (!construct_bb()) {
|
||||
return; // Exit if no interesting nodes or complex graph.
|
||||
}
|
||||
|
||||
// build _dg, _disjoint_ptrs
|
||||
dependence_graph();
|
||||
|
||||
// compute function depth(Node*)
|
||||
compute_max_depth();
|
||||
|
||||
if (_do_vector_loop) {
|
||||
if (mark_generations() != -1) {
|
||||
hoist_loads_in_graph(); // this only rebuild the graph; all basic structs need rebuild explicitly
|
||||
CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
|
||||
bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
|
||||
if (cl->is_main_loop()) {
|
||||
if (_do_vector_loop) {
|
||||
if (mark_generations() != -1) {
|
||||
hoist_loads_in_graph(); // this only rebuild the graph; all basic structs need rebuild explicitly
|
||||
|
||||
if (!construct_bb()) {
|
||||
return; // Exit if no interesting nodes or complex graph.
|
||||
if (!construct_bb()) {
|
||||
return; // Exit if no interesting nodes or complex graph.
|
||||
}
|
||||
dependence_graph();
|
||||
compute_max_depth();
|
||||
}
|
||||
dependence_graph();
|
||||
compute_max_depth();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord) {
|
||||
tty->print_cr("\nSuperWord::_do_vector_loop: graph after hoist_loads_in_graph");
|
||||
_lpt->dump_head();
|
||||
for (int j = 0; j < _block.length(); j++) {
|
||||
Node* n = _block.at(j);
|
||||
int d = depth(n);
|
||||
for (int i = 0; i < d; i++) tty->print("%s", " ");
|
||||
tty->print("%d :", d);
|
||||
n->dump();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
compute_vector_element_type();
|
||||
|
||||
// Attempt vectorization
|
||||
|
||||
find_adjacent_refs();
|
||||
|
||||
extend_packlist();
|
||||
|
||||
if (_do_vector_loop) {
|
||||
if (_packset.length() == 0) {
|
||||
if (TraceSuperWord) {
|
||||
tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
|
||||
tty->print_cr("\nSuperWord::_do_vector_loop: graph after hoist_loads_in_graph");
|
||||
_lpt->dump_head();
|
||||
for (int j = 0; j < _block.length(); j++) {
|
||||
Node* n = _block.at(j);
|
||||
int d = depth(n);
|
||||
for (int i = 0; i < d; i++) tty->print("%s", " ");
|
||||
tty->print("%d :", d);
|
||||
n->dump();
|
||||
}
|
||||
}
|
||||
pack_parallel();
|
||||
#endif
|
||||
}
|
||||
|
||||
compute_vector_element_type();
|
||||
|
||||
// Attempt vectorization
|
||||
|
||||
find_adjacent_refs();
|
||||
|
||||
extend_packlist();
|
||||
|
||||
if (_do_vector_loop) {
|
||||
if (_packset.length() == 0) {
|
||||
if (TraceSuperWord) {
|
||||
tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
|
||||
}
|
||||
pack_parallel();
|
||||
}
|
||||
}
|
||||
|
||||
combine_packs();
|
||||
|
||||
construct_my_pack_map();
|
||||
|
||||
if (_do_vector_loop) {
|
||||
merge_packs_to_cmovd();
|
||||
}
|
||||
|
||||
filter_packs();
|
||||
|
||||
schedule();
|
||||
} else if (post_loop_allowed) {
|
||||
int saved_mapped_unroll_factor = cl->slp_max_unroll();
|
||||
if (saved_mapped_unroll_factor) {
|
||||
int vector_mapped_unroll_factor = saved_mapped_unroll_factor;
|
||||
|
||||
// now reset the slp_unroll_factor so that we can check the analysis mapped
|
||||
// what the vector loop was mapped to
|
||||
cl->set_slp_max_unroll(0);
|
||||
|
||||
// do the analysis on the post loop
|
||||
unrolling_analysis(vector_mapped_unroll_factor);
|
||||
|
||||
// if our analyzed loop is a canonical fit, start processing it
|
||||
if (vector_mapped_unroll_factor == saved_mapped_unroll_factor) {
|
||||
// now add the vector nodes to packsets
|
||||
for (int i = 0; i < _post_block.length(); i++) {
|
||||
Node* n = _post_block.at(i);
|
||||
Node_List* singleton = new Node_List();
|
||||
singleton->push(n);
|
||||
_packset.append(singleton);
|
||||
set_my_pack(n, singleton);
|
||||
}
|
||||
|
||||
// map base types for vector usage
|
||||
compute_vector_element_type();
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// for some reason we could not map the slp analysis state of the vectorized loop
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
combine_packs();
|
||||
|
||||
construct_my_pack_map();
|
||||
|
||||
if (_do_vector_loop) {
|
||||
merge_packs_to_cmovd();
|
||||
}
|
||||
|
||||
filter_packs();
|
||||
|
||||
schedule();
|
||||
|
||||
output();
|
||||
}
|
||||
|
||||
@ -811,6 +921,7 @@ int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
|
||||
// Add dependence edges to load/store nodes for memory dependence
|
||||
// A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
|
||||
void SuperWord::dependence_graph() {
|
||||
CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
|
||||
// First, assign a dependence node to each memory node
|
||||
for (int i = 0; i < _block.length(); i++ ) {
|
||||
Node *n = _block.at(i);
|
||||
@ -825,7 +936,9 @@ void SuperWord::dependence_graph() {
|
||||
Node* n_tail = _mem_slice_tail.at(i);
|
||||
|
||||
// Get slice in predecessor order (last is first)
|
||||
mem_slice_preds(n_tail, n, _nlist);
|
||||
if (cl->is_main_loop()) {
|
||||
mem_slice_preds(n_tail, n, _nlist);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if(TraceSuperWord && Verbose) {
|
||||
@ -2029,20 +2142,23 @@ void SuperWord::output() {
|
||||
}
|
||||
#endif
|
||||
|
||||
// MUST ENSURE main loop's initial value is properly aligned:
|
||||
// (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
|
||||
CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
|
||||
if (cl->is_main_loop()) {
|
||||
// MUST ENSURE main loop's initial value is properly aligned:
|
||||
// (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
|
||||
|
||||
align_initial_loop_index(align_to_ref());
|
||||
align_initial_loop_index(align_to_ref());
|
||||
|
||||
// Insert extract (unpack) operations for scalar uses
|
||||
for (int i = 0; i < _packset.length(); i++) {
|
||||
insert_extracts(_packset.at(i));
|
||||
// Insert extract (unpack) operations for scalar uses
|
||||
for (int i = 0; i < _packset.length(); i++) {
|
||||
insert_extracts(_packset.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
Compile* C = _phase->C;
|
||||
CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
|
||||
uint max_vlen_in_bytes = 0;
|
||||
uint max_vlen = 0;
|
||||
bool can_process_post_loop = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
|
||||
|
||||
NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);})
|
||||
|
||||
@ -2064,6 +2180,10 @@ void SuperWord::output() {
|
||||
Node* vn = NULL;
|
||||
Node* low_adr = p->at(0);
|
||||
Node* first = executed_first(p);
|
||||
if (can_process_post_loop) {
|
||||
// override vlen with the main loops vector length
|
||||
vlen = cl->slp_max_unroll();
|
||||
}
|
||||
NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack", first->_idx, n->_idx); print_pack(p);})
|
||||
int opc = n->Opcode();
|
||||
if (n->is_Load()) {
|
||||
@ -2153,6 +2273,10 @@ void SuperWord::output() {
|
||||
vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
|
||||
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
||||
} else if (is_cmov_pack(p)) {
|
||||
if (can_process_post_loop) {
|
||||
// do not refactor of flow in post loop context
|
||||
return;
|
||||
}
|
||||
if (!n->is_CMove()) {
|
||||
continue;
|
||||
}
|
||||
@ -2217,6 +2341,7 @@ void SuperWord::output() {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
_block.at_put(i, vn);
|
||||
_igvn.register_new_node_with_optimizer(vn);
|
||||
_phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
|
||||
for (uint j = 0; j < p->size(); j++) {
|
||||
@ -2225,6 +2350,14 @@ void SuperWord::output() {
|
||||
}
|
||||
_igvn._worklist.push(vn);
|
||||
|
||||
if (can_process_post_loop) {
|
||||
// first check if the vector size if the maximum vector which we can use on the machine,
|
||||
// other vector size have reduced values for predicated data mapping.
|
||||
if (vlen_in_bytes != (uint)MaxVectorSize) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (vlen_in_bytes > max_vlen_in_bytes) {
|
||||
max_vlen = vlen;
|
||||
max_vlen_in_bytes = vlen_in_bytes;
|
||||
@ -2247,15 +2380,38 @@ void SuperWord::output() {
|
||||
if (TraceSuperWordLoopUnrollAnalysis) {
|
||||
tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte);
|
||||
}
|
||||
// For atomic unrolled loops which are vector mapped, instigate more unrolling.
|
||||
|
||||
// For atomic unrolled loops which are vector mapped, instigate more unrolling
|
||||
cl->set_notpassed_slp();
|
||||
// if vector resources are limited, do not allow additional unrolling
|
||||
if (FLOATPRESSURE > 8) {
|
||||
C->set_major_progress();
|
||||
if (cl->is_main_loop()) {
|
||||
// if vector resources are limited, do not allow additional unrolling, also
|
||||
// do not unroll more on pure vector loops which were not reduced so that we can
|
||||
// program the post loop to single iteration execution.
|
||||
if (FLOATPRESSURE > 8) {
|
||||
C->set_major_progress();
|
||||
cl->mark_do_unroll_only();
|
||||
}
|
||||
}
|
||||
cl->mark_do_unroll_only();
|
||||
|
||||
if (do_reserve_copy()) {
|
||||
cl->mark_loop_vectorized();
|
||||
if (can_process_post_loop) {
|
||||
// Now create the difference of trip and limit and use it as our mask index.
|
||||
// Note: We limited the unroll of the vectorized loop so that
|
||||
// only vlen-1 size iterations can remain to be mask programmed.
|
||||
Node *incr = cl->incr();
|
||||
SubINode *index = new SubINode(cl->limit(), cl->init_trip());
|
||||
_igvn.register_new_node_with_optimizer(index);
|
||||
SetVectMaskINode *mask = new SetVectMaskINode(_phase->get_ctrl(cl->init_trip()), index);
|
||||
_igvn.register_new_node_with_optimizer(mask);
|
||||
// make this a single iteration loop
|
||||
AddINode *new_incr = new AddINode(incr->in(1), mask);
|
||||
_igvn.register_new_node_with_optimizer(new_incr);
|
||||
_phase->set_ctrl(new_incr, _phase->get_ctrl(incr));
|
||||
_igvn.replace_node(incr, new_incr);
|
||||
cl->mark_is_multiversioned();
|
||||
cl->loopexit()->add_flag(Node::Flag_has_vector_mask_set);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2274,6 +2430,12 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
|
||||
Node* p0 = p->at(0);
|
||||
uint vlen = p->size();
|
||||
Node* opd = p0->in(opd_idx);
|
||||
CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
|
||||
|
||||
if (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()) {
|
||||
// override vlen with the main loops vector length
|
||||
vlen = cl->slp_max_unroll();
|
||||
}
|
||||
|
||||
if (same_inputs(p, opd_idx)) {
|
||||
if (opd->is_Vector() || opd->is_LoadVector()) {
|
||||
@ -3090,13 +3252,13 @@ CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode* cl) {
|
||||
return pre_end;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------init---------------------------
|
||||
void SuperWord::init() {
|
||||
_dg.init();
|
||||
_packset.clear();
|
||||
_disjoint_ptrs.clear();
|
||||
_block.clear();
|
||||
_post_block.clear();
|
||||
_data_entry.clear();
|
||||
_mem_slice_head.clear();
|
||||
_mem_slice_tail.clear();
|
||||
@ -3120,6 +3282,7 @@ void SuperWord::restart() {
|
||||
_packset.clear();
|
||||
_disjoint_ptrs.clear();
|
||||
_block.clear();
|
||||
_post_block.clear();
|
||||
_data_entry.clear();
|
||||
_mem_slice_head.clear();
|
||||
_mem_slice_tail.clear();
|
||||
|
||||
@ -261,6 +261,7 @@ class SuperWord : public ResourceObj {
|
||||
GrowableArray<int> _bb_idx; // Map from Node _idx to index within block
|
||||
|
||||
GrowableArray<Node*> _block; // Nodes in current block
|
||||
GrowableArray<Node*> _post_block; // Nodes in post loop block
|
||||
GrowableArray<Node*> _data_entry; // Nodes with all inputs from outside
|
||||
GrowableArray<Node*> _mem_slice_head; // Memory slice head nodes
|
||||
GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes
|
||||
|
||||
@ -529,6 +529,7 @@ class LoadVectorNode : public LoadNode {
|
||||
Node* adr, const TypePtr* atyp,
|
||||
uint vlen, BasicType bt,
|
||||
ControlDependency control_dependency = LoadNode::DependsOnlyOnTest);
|
||||
uint element_size(void) { return type2aelembytes(vect_type()->element_basic_type()); }
|
||||
};
|
||||
|
||||
//------------------------------StoreVectorNode--------------------------------
|
||||
@ -553,6 +554,8 @@ class StoreVectorNode : public StoreNode {
|
||||
static StoreVectorNode* make(int opc, Node* ctl, Node* mem,
|
||||
Node* adr, const TypePtr* atyp, Node* val,
|
||||
uint vlen);
|
||||
|
||||
uint element_size(void) { return type2aelembytes(vect_type()->element_basic_type()); }
|
||||
};
|
||||
|
||||
|
||||
@ -791,4 +794,15 @@ class ExtractDNode : public ExtractNode {
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
};
|
||||
|
||||
//------------------------------SetVectMaskINode-------------------------------
|
||||
// Provide a mask for a vector predicate machine
|
||||
class SetVectMaskINode : public Node {
|
||||
public:
|
||||
SetVectMaskINode(Node *c, Node *in1) : Node(c, in1) {}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return TypeInt::INT; }
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
virtual const Type *Value(PhaseGVN *phase) const { return TypeInt::INT; }
|
||||
};
|
||||
|
||||
#endif // SHARE_VM_OPTO_VECTORNODE_HPP
|
||||
|
||||
@ -1923,6 +1923,7 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
||||
declare_c2_type(ConvL2INode, Node) \
|
||||
declare_c2_type(CastX2PNode, Node) \
|
||||
declare_c2_type(CastP2XNode, Node) \
|
||||
declare_c2_type(SetVectMaskINode, Node) \
|
||||
declare_c2_type(MemBarNode, MultiNode) \
|
||||
declare_c2_type(MemBarAcquireNode, MemBarNode) \
|
||||
declare_c2_type(MemBarReleaseNode, MemBarNode) \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user