mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-15 12:55:07 +00:00
8277617: Adjust AVX3Threshold for copy/fill stubs
Reviewed-by: jbhateja, dholmes, neliasso, jiefu
This commit is contained in:
parent
2b87c2b429
commit
24e16ac637
@ -5021,7 +5021,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
|
||||
// cnt - number of qwords (8-byte words).
|
||||
// base - start address, qword aligned.
|
||||
Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
|
||||
bool use64byteVector = MaxVectorSize == 64 && AVX3Threshold == 0;
|
||||
bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
|
||||
if (use64byteVector) {
|
||||
vpxor(xtmp, xtmp, xtmp, AVX_512bit);
|
||||
} else if (MaxVectorSize >= 32) {
|
||||
@ -5085,7 +5085,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
|
||||
// Clearing constant sized memory using YMM/ZMM registers.
|
||||
void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
|
||||
assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "");
|
||||
bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
|
||||
bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
|
||||
|
||||
int vector64_count = (cnt & (~0x7)) >> 3;
|
||||
cnt = cnt & 0x7;
|
||||
@ -5328,8 +5328,8 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
|
||||
// Fill 64-byte chunks
|
||||
Label L_fill_64_bytes_loop_avx3, L_check_fill_64_bytes_avx2;
|
||||
|
||||
// If number of bytes to fill < AVX3Threshold, perform fill using AVX2
|
||||
cmpl(count, AVX3Threshold);
|
||||
// If number of bytes to fill < VM_Version::avx3_threshold(), perform fill using AVX2
|
||||
cmpl(count, VM_Version::avx3_threshold());
|
||||
jccb(Assembler::below, L_check_fill_64_bytes_avx2);
|
||||
|
||||
vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
|
||||
@ -8717,6 +8717,7 @@ void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register va
|
||||
Label L_fill_zmm_sequence;
|
||||
|
||||
int shift = -1;
|
||||
int avx3threshold = VM_Version::avx3_threshold();
|
||||
switch(type) {
|
||||
case T_BYTE: shift = 0;
|
||||
break;
|
||||
@ -8732,10 +8733,10 @@ void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register va
|
||||
fatal("Unhandled type: %s\n", type2name(type));
|
||||
}
|
||||
|
||||
if (AVX3Threshold != 0 || MaxVectorSize == 32) {
|
||||
if ((avx3threshold != 0) || (MaxVectorSize == 32)) {
|
||||
|
||||
if (MaxVectorSize == 64) {
|
||||
cmpq(count, AVX3Threshold >> shift);
|
||||
cmpq(count, avx3threshold >> shift);
|
||||
jcc(Assembler::greater, L_fill_zmm_sequence);
|
||||
}
|
||||
|
||||
|
||||
@ -114,7 +114,7 @@ void MacroAssembler::arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KReg
|
||||
bool use64byteVector, Label& L_entry, Label& L_exit) {
|
||||
Label L_entry_64, L_entry_96, L_entry_128;
|
||||
Label L_entry_160, L_entry_192;
|
||||
bool avx3 = MaxVectorSize > 32 && AVX3Threshold == 0;
|
||||
bool avx3 = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
|
||||
|
||||
int size_mat[][6] = {
|
||||
/* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 },
|
||||
|
||||
@ -1384,8 +1384,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
|
||||
int avx3threshold = VM_Version::avx3_threshold();
|
||||
bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0);
|
||||
Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
|
||||
Label L_repmovs, L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
|
||||
const Register from = rdi; // source array address
|
||||
@ -1448,7 +1448,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// PRE-MAIN-POST loop for aligned copy.
|
||||
__ BIND(L_entry);
|
||||
|
||||
if (AVX3Threshold != 0) {
|
||||
if (avx3threshold != 0) {
|
||||
__ cmpq(count, threshold[shift]);
|
||||
if (MaxVectorSize == 64) {
|
||||
// Copy using 64 byte vectors.
|
||||
@ -1460,7 +1460,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
if (MaxVectorSize < 64 || AVX3Threshold != 0) {
|
||||
if ((MaxVectorSize < 64) || (avx3threshold != 0)) {
|
||||
// Partial copy to make dst address 32 byte aligned.
|
||||
__ movq(temp2, to);
|
||||
__ andq(temp2, 31);
|
||||
@ -1603,7 +1603,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
|
||||
int avx3threshold = VM_Version::avx3_threshold();
|
||||
bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0);
|
||||
|
||||
Label L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
|
||||
Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
|
||||
@ -1668,12 +1669,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// PRE-MAIN-POST loop for aligned copy.
|
||||
__ BIND(L_entry);
|
||||
|
||||
if (MaxVectorSize > 32 && AVX3Threshold != 0) {
|
||||
if ((MaxVectorSize > 32) && (avx3threshold != 0)) {
|
||||
__ cmpq(temp1, threshold[shift]);
|
||||
__ jcc(Assembler::greaterEqual, L_pre_main_post_64);
|
||||
}
|
||||
|
||||
if (MaxVectorSize < 64 || AVX3Threshold != 0) {
|
||||
if ((MaxVectorSize < 64) || (avx3threshold != 0)) {
|
||||
// Partial copy to make dst address 32 byte aligned.
|
||||
__ leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0));
|
||||
__ andq(temp2, 31);
|
||||
|
||||
@ -1878,6 +1878,17 @@ void VM_Version::check_virtualizations() {
|
||||
}
|
||||
}
|
||||
|
||||
// avx3_threshold() sets the threshold at which 64-byte instructions are used
|
||||
// for implementing the array copy and clear operations.
|
||||
// The Intel platforms that supports the serialize instruction
|
||||
// has improved implementation of 64-byte load/stores and so the default
|
||||
// threshold is set to 0 for these platforms.
|
||||
int VM_Version::avx3_threshold() {
|
||||
return (is_intel_family_core() &&
|
||||
supports_serialize() &&
|
||||
FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
|
||||
}
|
||||
|
||||
void VM_Version::initialize() {
|
||||
ResourceMark rm;
|
||||
// Making this stub must be FIRST use of assembler
|
||||
|
||||
@ -911,6 +911,8 @@ public:
|
||||
static bool is_intel_skylake() { return is_intel_family_core() &&
|
||||
extended_cpu_model() == CPU_MODEL_SKYLAKE; }
|
||||
|
||||
static int avx3_threshold();
|
||||
|
||||
static bool is_intel_tsc_synched_at_init() {
|
||||
if (is_intel_family_core()) {
|
||||
uint32_t ext_model = extended_cpu_model();
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user