mirror of
https://github.com/openjdk/jdk.git
synced 2026-06-12 21:45:05 +00:00
8284960: Integration of JEP 426: Vector API (Fourth Incubator)
Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org> Co-authored-by: Paul Sandoz <psandoz@openjdk.org> Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org> Co-authored-by: Smita Kamath <svkamath@openjdk.org> Co-authored-by: Joshua Zhu <jzhu@openjdk.org> Co-authored-by: Xiaohong Gong <xgong@openjdk.org> Co-authored-by: John R Rose <jrose@openjdk.org> Co-authored-by: Eric Liu <eliu@openjdk.org> Co-authored-by: Ningsheng Jian <njian@openjdk.org> Reviewed-by: ngasson, vlivanov, mcimadamore, jlahoda, kvn
This commit is contained in:
parent
171a7cdd5d
commit
6f6486e977
@ -2468,6 +2468,9 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
break;
|
||||
case Op_LoadVectorGather:
|
||||
case Op_StoreVectorScatter:
|
||||
case Op_CompressV:
|
||||
case Op_CompressM:
|
||||
case Op_ExpandV:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
@ -8658,7 +8661,6 @@ instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
|
||||
//
|
||||
|
||||
instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
|
||||
predicate(UsePopCountInstruction);
|
||||
match(Set dst (PopCountI src));
|
||||
effect(TEMP tmp);
|
||||
ins_cost(INSN_COST * 13);
|
||||
@ -8680,7 +8682,6 @@ instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
|
||||
%}
|
||||
|
||||
instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
|
||||
predicate(UsePopCountInstruction);
|
||||
match(Set dst (PopCountI (LoadI mem)));
|
||||
effect(TEMP tmp);
|
||||
ins_cost(INSN_COST * 13);
|
||||
@ -8703,7 +8704,6 @@ instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
|
||||
|
||||
// Note: Long.bitCount(long) returns an int.
|
||||
instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
|
||||
predicate(UsePopCountInstruction);
|
||||
match(Set dst (PopCountL src));
|
||||
effect(TEMP tmp);
|
||||
ins_cost(INSN_COST * 13);
|
||||
@ -8723,7 +8723,6 @@ instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
|
||||
%}
|
||||
|
||||
instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
|
||||
predicate(UsePopCountInstruction);
|
||||
match(Set dst (PopCountL (LoadL mem)));
|
||||
effect(TEMP tmp);
|
||||
ins_cost(INSN_COST * 13);
|
||||
|
||||
@ -5683,14 +5683,58 @@ instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{
|
||||
ins_pipe(vdop_fp128);
|
||||
%}
|
||||
|
||||
instruct vpopcount4I(vecX dst, vecX src) %{
|
||||
predicate(UsePopCountInstruction && n->as_Vector()->length() == 4);
|
||||
instruct vpopcountID(vecD dst, vecD src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() < 16);
|
||||
match(Set dst (PopCountVI src));
|
||||
format %{
|
||||
"cnt $dst, $src\t# vector (16B)\n\t"
|
||||
"uaddlp $dst, $dst\t# vector (16B)\n\t"
|
||||
"uaddlp $dst, $dst\t# vector (8H)"
|
||||
ins_cost(3 * INSN_COST);
|
||||
format %{ "vpopcountI $dst, $src\t# vector (8B/4H/2S)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T8B,
|
||||
as_FloatRegister($src$$reg));
|
||||
if (bt == T_SHORT || bt == T_INT) {
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
|
||||
as_FloatRegister($dst$$reg));
|
||||
if (bt == T_INT) {
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
|
||||
as_FloatRegister($dst$$reg));
|
||||
}
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct vpopcountIX(vecX dst, vecX src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 16);
|
||||
match(Set dst (PopCountVI src));
|
||||
ins_cost(3 * INSN_COST);
|
||||
format %{ "vpopcountI $dst, $src\t# vector (16B/8H/4S)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($src$$reg));
|
||||
if (bt == T_SHORT || bt == T_INT) {
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($dst$$reg));
|
||||
if (bt == T_INT) {
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
|
||||
as_FloatRegister($dst$$reg));
|
||||
}
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
// If the PopCountVL is generated by auto-vectorization, the dst basic
|
||||
// type is T_INT. And once we have unified the type definition for
|
||||
// Vector API and auto-vectorization, this rule can be merged with
|
||||
// "vpopcountLX" rule.
|
||||
instruct vpopcountLD(vecD dst, vecX src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() < 16 &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
||||
match(Set dst (PopCountVL src));
|
||||
ins_cost(5 * INSN_COST);
|
||||
format %{ "vpopcountL $dst, $src\t# vector (2S)" %}
|
||||
ins_encode %{
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($src$$reg));
|
||||
@ -5698,24 +5742,28 @@ instruct vpopcount4I(vecX dst, vecX src) %{
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ xtn(as_FloatRegister($dst$$reg), __ T2S,
|
||||
as_FloatRegister($dst$$reg), __ T2D);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct vpopcount2I(vecD dst, vecD src) %{
|
||||
predicate(UsePopCountInstruction && n->as_Vector()->length() == 2);
|
||||
match(Set dst (PopCountVI src));
|
||||
format %{
|
||||
"cnt $dst, $src\t# vector (8B)\n\t"
|
||||
"uaddlp $dst, $dst\t# vector (8B)\n\t"
|
||||
"uaddlp $dst, $dst\t# vector (4H)"
|
||||
%}
|
||||
instruct vpopcountLX(vecX dst, vecX src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 16 &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
|
||||
match(Set dst (PopCountVL src));
|
||||
ins_cost(4 * INSN_COST);
|
||||
format %{ "vpopcountL $dst, $src\t# vector (2D)" %}
|
||||
ins_encode %{
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T8B,
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($src$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
|
||||
as_FloatRegister($dst$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
@ -5921,3 +5969,131 @@ instruct vmask_tolong16B(iRegLNoSp dst, vecX src) %{
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
//------------------------- CountLeadingZerosV -----------------------------
|
||||
|
||||
instruct countLeadingZerosVD(vecD dst, vecD src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 8);
|
||||
match(Set dst (CountLeadingZerosV src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "countLeadingZerosV $dst, $src\t# vector (8B/4H/2S)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);
|
||||
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct countLeadingZerosVX(vecX dst, vecX src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 16);
|
||||
match(Set dst (CountLeadingZerosV src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "countLeadingZerosV $dst, $src\t# vector (16B/8H/4S/2D)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);
|
||||
if (bt != T_LONG) {
|
||||
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
|
||||
} else {
|
||||
__ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 0);
|
||||
__ clz(rscratch1, rscratch1);
|
||||
__ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
|
||||
__ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1);
|
||||
__ clz(rscratch1, rscratch1);
|
||||
__ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
//------------------------- CountTrailingZerosV ----------------------------
|
||||
|
||||
instruct countTrailingZerosVD(vecD dst, vecD src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 8);
|
||||
match(Set dst (CountTrailingZerosV src));
|
||||
ins_cost(3 * INSN_COST);
|
||||
format %{ "countTrailingZerosV $dst, $src\t# vector (8B/4H/2S)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);
|
||||
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
|
||||
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($dst$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct countTrailingZerosVX(vecX dst, vecX src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 16);
|
||||
match(Set dst (CountTrailingZerosV src));
|
||||
ins_cost(3 * INSN_COST);
|
||||
format %{ "countTrailingZerosV $dst, $src\t# vector (16B/8H/4S/2D)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);
|
||||
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
|
||||
if (bt != T_LONG) {
|
||||
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($dst$$reg));
|
||||
} else {
|
||||
__ umov(rscratch1, as_FloatRegister($dst$$reg), __ D, 0);
|
||||
__ clz(rscratch1, rscratch1);
|
||||
__ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
|
||||
__ umov(rscratch1, as_FloatRegister($dst$$reg), __ D, 1);
|
||||
__ clz(rscratch1, rscratch1);
|
||||
__ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
//------------------------------ ReverseV -----------------------------------
|
||||
|
||||
instruct vreverseD(vecD dst, vecD src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 8);
|
||||
match(Set dst (ReverseV src));
|
||||
ins_cost(2 * INSN_COST);
|
||||
format %{ "ReverseV $dst, $src\t# vector (D)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vreverseX(vecX dst, vecX src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 16);
|
||||
match(Set dst (ReverseV src));
|
||||
ins_cost(2 * INSN_COST);
|
||||
format %{ "ReverseV $dst, $src\t# vector (X)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
//---------------------------- ReverseBytesV --------------------------------
|
||||
|
||||
instruct vreverseBytesD(vecD dst, vecD src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 8);
|
||||
match(Set dst (ReverseBytesV src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "ReverseBytesV $dst, $src\t# vector (D)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ neon_reverse_bytes(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vreverseBytesX(vecX dst, vecX src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 16);
|
||||
match(Set dst (ReverseBytesV src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "ReverseBytesV $dst, $src\t# vector (X)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ neon_reverse_bytes(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
@ -2445,28 +2445,50 @@ instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{
|
||||
ins_pipe(vdop_fp128);
|
||||
%}
|
||||
dnl
|
||||
define(`VPOPCOUNT', `
|
||||
instruct vpopcount$1$2`'(vec$5 dst, vec$5 src) %{
|
||||
predicate(UsePopCountInstruction && n->as_Vector()->length() == $1);
|
||||
match(Set dst (PopCountVI src));
|
||||
format %{
|
||||
"cnt $dst, $src\t# vector ($3B)\n\t"
|
||||
"uaddlp $dst, $dst\t# vector ($3B)\n\t"
|
||||
"uaddlp $dst, $dst\t# vector ($4H)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T$3B,
|
||||
as_FloatRegister($src$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T$3B,
|
||||
define(`VPOPCOUNT', `dnl
|
||||
ifelse($1$2, `LD', `
|
||||
// If the PopCountVL is generated by auto-vectorization, the dst basic
|
||||
// type is T_INT. And once we have unified the type definition for
|
||||
// Vector API and auto-vectorization, this rule can be merged with
|
||||
// "vpopcountLX" rule.', `')
|
||||
instruct vpopcount$1$2`'(vec$2 dst, vec$3 src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() $4 16`'ifelse($1$2, `LD', ` &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_INT', $1$2, `LX', ` &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
|
||||
match(Set dst (PopCountV$1 src));
|
||||
ins_cost($5 * INSN_COST);
|
||||
format %{ "vpopcount$1 $dst, $src\t# vector ($6)" %}
|
||||
ins_encode %{dnl
|
||||
ifelse($1, `I', `
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);', `')
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T`'ifelse($3, D, 8, 16)B,
|
||||
as_FloatRegister($src$$reg));dnl
|
||||
ifelse($1, `L', `
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T$4H,
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
|
||||
as_FloatRegister($dst$$reg));', `
|
||||
if (bt == T_SHORT || bt == T_INT) {
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T`'ifelse($2, D, 8, 16)B,
|
||||
as_FloatRegister($dst$$reg));
|
||||
if (bt == T_INT) {
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T`'ifelse($2, D, 4, 8)H,
|
||||
as_FloatRegister($dst$$reg));
|
||||
}
|
||||
}')dnl
|
||||
ifelse($1$2, `LD', `
|
||||
__ xtn(as_FloatRegister($dst$$reg), __ T2S,
|
||||
as_FloatRegister($dst$$reg), __ T2D);', `')
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}')dnl
|
||||
dnl $1 $2 $3 $4 $5
|
||||
VPOPCOUNT(4, I, 16, 8, X)
|
||||
VPOPCOUNT(2, I, 8, 4, D)
|
||||
dnl $1 $2 $3 $4 $5 $6
|
||||
VPOPCOUNT(I, D, D, <, 3, 8B/4H/2S)
|
||||
VPOPCOUNT(I, X, X, ==, 3, 16B/8H/4S)
|
||||
VPOPCOUNT(L, D, X, <, 5, 2S)
|
||||
VPOPCOUNT(L, X, X, ==, 4, 2D)
|
||||
dnl
|
||||
dnl VMASK_TRUECOUNT($1, $2 )
|
||||
dnl VMASK_TRUECOUNT(suffix, reg)
|
||||
@ -2647,3 +2669,81 @@ instruct vmask_tolong16B(iRegLNoSp dst, vecX src) %{
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
dnl
|
||||
dnl CLTZ_D($1 )
|
||||
dnl CLTZ_D(op_name)
|
||||
define(`CLTZ_D', `
|
||||
instruct count$1D(vecD dst, vecD src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 8);
|
||||
match(Set dst (Count$1 src));
|
||||
ins_cost(ifelse($1, `TrailingZerosV', `3 * ', `')INSN_COST);
|
||||
format %{ "count$1 $dst, $src\t# vector (8B/4H/2S)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);dnl
|
||||
ifelse($1, `TrailingZerosV', `
|
||||
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);', `')
|
||||
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
dnl CLTZ_X($1 )
|
||||
dnl CLTZ_X(op_name)
|
||||
define(`CLTZ_X', `
|
||||
instruct count$1X(vecX dst, vecX src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 16);
|
||||
match(Set dst (Count$1 src));
|
||||
ins_cost(ifelse($1, `TrailingZerosV', `3 * ', `')INSN_COST);
|
||||
format %{ "count$1 $dst, $src\t# vector (16B/8H/4S/2D)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);dnl
|
||||
ifelse($1, `TrailingZerosV', `
|
||||
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);', `')
|
||||
if (bt != T_LONG) {
|
||||
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg));
|
||||
} else {
|
||||
__ umov(rscratch1, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg), __ D, 0);
|
||||
__ clz(rscratch1, rscratch1);
|
||||
__ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
|
||||
__ umov(rscratch1, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg), __ D, 1);
|
||||
__ clz(rscratch1, rscratch1);
|
||||
__ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
//------------------------- CountLeadingZerosV -----------------------------
|
||||
CLTZ_D(LeadingZerosV)
|
||||
CLTZ_X(LeadingZerosV)
|
||||
|
||||
//------------------------- CountTrailingZerosV ----------------------------
|
||||
CLTZ_D(TrailingZerosV)
|
||||
CLTZ_X(TrailingZerosV)
|
||||
|
||||
dnl
|
||||
dnl REVERSE($1, $2, $3, $4 )
|
||||
dnl REVERSE(insn_name, op_name, type, insn)
|
||||
define(`REVERSE', `
|
||||
instruct $1(vec$3 dst, vec$3 src) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == ifelse($3, D, 8, 16));
|
||||
match(Set dst ($2 src));
|
||||
ins_cost(ifelse($2, `ReverseV', `2 * ', `')INSN_COST);
|
||||
format %{ "$2 $dst, $src\t# vector ($3)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ $4(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, ifelse($3, D, false, true));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
//------------------------------ ReverseV -----------------------------------
|
||||
REVERSE(vreverseD, ReverseV, D, neon_reverse_bits)
|
||||
REVERSE(vreverseX, ReverseV, X, neon_reverse_bits)
|
||||
|
||||
//---------------------------- ReverseBytesV --------------------------------
|
||||
REVERSE(vreverseBytesD, ReverseBytesV, D, neon_reverse_bytes)
|
||||
REVERSE(vreverseBytesX, ReverseBytesV, X, neon_reverse_bytes)
|
||||
|
||||
@ -149,6 +149,8 @@ source %{
|
||||
case Op_LoadVector:
|
||||
case Op_StoreVector:
|
||||
return Matcher::vector_size_supported(bt, vlen);
|
||||
case Op_ExpandV:
|
||||
if (UseSVE < 2 || is_subword_type(bt)) return false;
|
||||
case Op_VectorMaskToLong:
|
||||
if (vlen > 64) return false;
|
||||
default:
|
||||
@ -2199,14 +2201,83 @@ instruct vnegD_masked(vReg dst_src, pRegGov pg) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// popcount vector
|
||||
// vector popcount
|
||||
|
||||
instruct vpopcountI(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0);
|
||||
predicate(UseSVE > 0 &&
|
||||
!n->as_Vector()->is_predicated_vector());
|
||||
match(Set dst (PopCountVI src));
|
||||
format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_cnt $dst, $src\t# vector (sve) (B/H/S)" %}
|
||||
ins_encode %{
|
||||
__ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_cnt(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vpopcountL(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
!n->as_Vector()->is_predicated_vector() &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
|
||||
match(Set dst (PopCountVL src));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_cnt $dst, $src\t# vector (sve) (D)" %}
|
||||
ins_encode %{
|
||||
__ sve_cnt(as_FloatRegister($dst$$reg), __ D,
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// If the PopCountVL is generated by auto-vectorization, the dst basic
|
||||
// type is T_INT. And once we have unified the type definition for
|
||||
// Vector API and auto-vectorization, this rule can be merged with
|
||||
// "vpopcountL" rule.
|
||||
instruct vpopcountLI(vReg dst, vReg src, vReg vtmp) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
!n->as_Vector()->is_predicated_vector() &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
||||
match(Set dst (PopCountVL src));
|
||||
effect(TEMP_DEF dst, TEMP vtmp);
|
||||
ins_cost(3 * SVE_COST);
|
||||
format %{ "sve_cnt $dst, $src\n\t"
|
||||
"sve_dup $vtmp, #0\n\t"
|
||||
"sve_uzp1 $dst, $dst, $vtmp\t# vector (sve) (S)" %}
|
||||
ins_encode %{
|
||||
__ sve_cnt(as_FloatRegister($dst$$reg), __ D,
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
__ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
|
||||
as_FloatRegister($dst$$reg), __ D, as_FloatRegister($vtmp$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// vector popcount - predicated
|
||||
|
||||
instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (PopCountVI dst_src pg));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_cnt $dst_src, $pg, $dst_src\t# vector (sve) (B/H/S)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_cnt(as_FloatRegister($dst_src$$reg), __ elemType_to_regVariant(bt),
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
|
||||
match(Set dst_src (PopCountVL dst_src pg));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_cnt $dst_src, $pg, $dst_src\t# vector (sve) (D)" %}
|
||||
ins_encode %{
|
||||
__ sve_cnt(as_FloatRegister($dst_src$$reg), __ D,
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -5767,6 +5838,104 @@ instruct vloadmask_extend(pRegGov dst, vReg src, vReg tmp, rFlagsReg cr) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ---------------------------- Compress/Expand Operations ---------------------------
|
||||
|
||||
instruct mcompress(pReg dst, pReg pg, rFlagsReg cr) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst (CompressM pg));
|
||||
effect(KILL cr);
|
||||
ins_cost(2 * SVE_COST);
|
||||
format %{ "sve_cntp rscratch1, $pg\n\t"
|
||||
"sve_whilelo $dst, zr, rscratch1\t# mask compress (B/H/S/D)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_cntp(rscratch1, size, ptrue, as_PRegister($pg$$reg));
|
||||
__ sve_whilelo(as_PRegister($dst$$reg), size, zr, rscratch1);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
(n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT ||
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
|
||||
match(Set dst (CompressV src pg));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_compact $dst, $src, $pg\t# vector compress (S/D)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_compact(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg), as_PRegister($pg$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcompressB(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, vReg vtmp3, vReg vtmp4,
|
||||
pReg ptmp, pRegGov pgtmp) %{
|
||||
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
|
||||
effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP ptmp, TEMP pgtmp);
|
||||
match(Set dst (CompressV src pg));
|
||||
ins_cost(13 * SVE_COST);
|
||||
format %{ "sve_compact $dst, $src, $pg\t# vector compress (B)" %}
|
||||
ins_encode %{
|
||||
__ sve_compress_byte(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
|
||||
as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg),
|
||||
as_FloatRegister($vtmp3$$reg),as_FloatRegister($vtmp4$$reg),
|
||||
as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcompressS(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, pRegGov pgtmp) %{
|
||||
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
|
||||
effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp);
|
||||
match(Set dst (CompressV src pg));
|
||||
ins_cost(38 * SVE_COST);
|
||||
format %{ "sve_compact $dst, $src, $pg\t# vector compress (H)" %}
|
||||
ins_encode %{
|
||||
__ sve_compress_short(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
|
||||
as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), as_PRegister($pgtmp$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
|
||||
match(Set dst (ExpandV src pg));
|
||||
effect(TEMP_DEF dst);
|
||||
ins_cost(4 * SVE_COST);
|
||||
format %{ "sve_dup $dst, S/D, 0\n\t"
|
||||
"sve_histcnt $dst, S/D, $pg, $dst, $dst\n\t"
|
||||
"sve_sub $dst, S/D, 1\n\t"
|
||||
"sve_tbl $dst, S/D, $src, $dst\t# vector expand (S/D)" %}
|
||||
ins_encode %{
|
||||
// Example input: src = 1 2 3 4 5 6 7 8
|
||||
// pg = 1 0 0 1 1 0 1 1
|
||||
// Expected result: dst = 4 0 0 5 6 0 7 8
|
||||
|
||||
// The basic idea is to use TBL which can shuffle the elements in the given
|
||||
// vector flexibly. HISTCNT + SUB is used to generate the second source input
|
||||
// for TBL whose value is used to select the indexed element from src vector.
|
||||
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
// dst = 0 0 0 0 0 0 0 0
|
||||
__ sve_dup(as_FloatRegister($dst$$reg), size, 0);
|
||||
// dst = 5 0 0 4 3 0 2 1
|
||||
__ sve_histcnt(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
|
||||
// dst = 4 -1 -1 3 2 -1 1 0
|
||||
__ sve_sub(as_FloatRegister($dst$$reg), size, 1);
|
||||
// dst = 4 0 0 5 6 0 7 8
|
||||
__ sve_tbl(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($dst$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set pg (VectorMaskGen len));
|
||||
@ -5780,3 +5949,147 @@ instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------ CountLeadingZerosV ------------------------------
|
||||
|
||||
instruct vcountLeadingZeros(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
!n->as_Vector()->is_predicated_vector());
|
||||
match(Set dst (CountLeadingZerosV src));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_clz $dst, $src\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_clz(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
instruct vcountLeadingZeros_masked(vReg dst_src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (CountLeadingZerosV dst_src pg));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_clz $dst_src, $pg, $dst_src\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_clz(as_FloatRegister($dst_src$$reg), size,
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------ CountTrailingZerosV -----------------------------
|
||||
|
||||
instruct vcountTrailingZeros(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
!n->as_Vector()->is_predicated_vector());
|
||||
match(Set dst (CountTrailingZerosV src));
|
||||
ins_cost(2 * SVE_COST);
|
||||
format %{ "sve_rbit $dst, $src\n\t"
|
||||
"sve_clz $dst, $dst\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_rbit(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
|
||||
__ sve_clz(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($dst$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
instruct vcountTrailingZeros_masked(vReg dst_src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (CountTrailingZerosV dst_src pg));
|
||||
ins_cost(2 * SVE_COST);
|
||||
format %{ "sve_rbit $dst_src, $pg, $dst_src\n\t"
|
||||
"sve_clz $dst_src, $pg, $dst_src\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_rbit(as_FloatRegister($dst_src$$reg), size,
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
|
||||
__ sve_clz(as_FloatRegister($dst_src$$reg), size,
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ---------------------------------- ReverseV ------------------------------------
|
||||
|
||||
instruct vreverse(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
!n->as_Vector()->is_predicated_vector());
|
||||
match(Set dst (ReverseV src));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_rbit $dst, $src\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_rbit(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
instruct vreverse_masked(vReg dst_src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (ReverseV dst_src pg));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_rbit $dst_src, $pg, $dst_src\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_rbit(as_FloatRegister($dst_src$$reg), size,
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// -------------------------------- ReverseBytesV ---------------------------------
|
||||
|
||||
instruct vreverseBytes(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
!n->as_Vector()->is_predicated_vector());
|
||||
match(Set dst (ReverseBytesV src));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_revb $dst, $src\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
if (bt == T_BYTE) {
|
||||
if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
|
||||
__ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
|
||||
}
|
||||
} else {
|
||||
__ sve_revb(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
instruct vreverseBytes_masked(vReg dst_src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (ReverseBytesV dst_src pg));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_revb $dst_src, $pg, $dst_src\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
if (bt == T_BYTE) {
|
||||
// do nothing
|
||||
} else {
|
||||
__ sve_revb(as_FloatRegister($dst_src$$reg), size,
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
|
||||
@ -144,6 +144,8 @@ source %{
|
||||
case Op_LoadVector:
|
||||
case Op_StoreVector:
|
||||
return Matcher::vector_size_supported(bt, vlen);
|
||||
case Op_ExpandV:
|
||||
if (UseSVE < 2 || is_subword_type(bt)) return false;
|
||||
case Op_VectorMaskToLong:
|
||||
if (vlen > 64) return false;
|
||||
default:
|
||||
@ -1172,18 +1174,75 @@ UNARY_OP_PREDICATE(vnegL, NegVL, D, sve_neg)
|
||||
UNARY_OP_PREDICATE(vnegF, NegVF, S, sve_fneg)
|
||||
UNARY_OP_PREDICATE(vnegD, NegVD, D, sve_fneg)
|
||||
|
||||
// popcount vector
|
||||
dnl
|
||||
dnl VPOPCOUNT($1, $2 )
|
||||
dnl VPOPCOUNT(name_suffix, size)
|
||||
define(`VPOPCOUNT', `
|
||||
instruct vpopcount$1(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
!n->as_Vector()->is_predicated_vector()`'ifelse($1, `L', ` &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
|
||||
match(Set dst (PopCountV$1 src));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_cnt $dst, $src\t# vector (sve) ($2)" %}
|
||||
ins_encode %{dnl
|
||||
ifelse($1, `I', `
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);', `')
|
||||
__ sve_cnt(as_FloatRegister($dst$$reg), ifelse($1, `I', `__ elemType_to_regVariant(bt)', `__ D'),
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// vector popcount
|
||||
VPOPCOUNT(I, B/H/S)
|
||||
VPOPCOUNT(L, D)
|
||||
|
||||
instruct vpopcountI(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst (PopCountVI src));
|
||||
format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
|
||||
// If the PopCountVL is generated by auto-vectorization, the dst basic
|
||||
// type is T_INT. And once we have unified the type definition for
|
||||
// Vector API and auto-vectorization, this rule can be merged with
|
||||
// "vpopcountL" rule.
|
||||
instruct vpopcountLI(vReg dst, vReg src, vReg vtmp) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
!n->as_Vector()->is_predicated_vector() &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
||||
match(Set dst (PopCountVL src));
|
||||
effect(TEMP_DEF dst, TEMP vtmp);
|
||||
ins_cost(3 * SVE_COST);
|
||||
format %{ "sve_cnt $dst, $src\n\t"
|
||||
"sve_dup $vtmp, #0\n\t"
|
||||
"sve_uzp1 $dst, $dst, $vtmp\t# vector (sve) (S)" %}
|
||||
ins_encode %{
|
||||
__ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
|
||||
__ sve_cnt(as_FloatRegister($dst$$reg), __ D,
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
__ sve_vector_narrow(as_FloatRegister($dst$$reg), __ S,
|
||||
as_FloatRegister($dst$$reg), __ D, as_FloatRegister($vtmp$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
dnl
|
||||
dnl VPOPCOUNT_PREDICATE($1, $2 )
|
||||
dnl VPOPCOUNT_PREDICATE(name_suffix, size)
|
||||
define(`VPOPCOUNT_PREDICATE', `
|
||||
instruct vpopcount$1_masked(vReg dst_src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0`'ifelse($1, `L', ` &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `'));
|
||||
match(Set dst_src (PopCountV$1 dst_src pg));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_cnt $dst_src, $pg, $dst_src\t# vector (sve) ($2)" %}
|
||||
ins_encode %{dnl
|
||||
ifelse($1, `I', `
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);', `')
|
||||
__ sve_cnt(as_FloatRegister($dst_src$$reg), ifelse($1, `I', `__ elemType_to_regVariant(bt)', `__ D'),
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
// vector popcount - predicated
|
||||
VPOPCOUNT_PREDICATE(I, B/H/S)
|
||||
VPOPCOUNT_PREDICATE(L, D)
|
||||
|
||||
// vector blend
|
||||
|
||||
instruct vblend(vReg dst, vReg src1, vReg src2, pRegGov pg) %{
|
||||
@ -3234,6 +3293,104 @@ instruct vloadmask_extend(pRegGov dst, vReg src, vReg tmp, rFlagsReg cr) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ---------------------------- Compress/Expand Operations ---------------------------
|
||||
|
||||
instruct mcompress(pReg dst, pReg pg, rFlagsReg cr) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst (CompressM pg));
|
||||
effect(KILL cr);
|
||||
ins_cost(2 * SVE_COST);
|
||||
format %{ "sve_cntp rscratch1, $pg\n\t"
|
||||
"sve_whilelo $dst, zr, rscratch1\t# mask compress (B/H/S/D)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_cntp(rscratch1, size, ptrue, as_PRegister($pg$$reg));
|
||||
__ sve_whilelo(as_PRegister($dst$$reg), size, zr, rscratch1);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
(n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT ||
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
|
||||
match(Set dst (CompressV src pg));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_compact $dst, $src, $pg\t# vector compress (S/D)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_compact(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg), as_PRegister($pg$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcompressB(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, vReg vtmp3, vReg vtmp4,
|
||||
pReg ptmp, pRegGov pgtmp) %{
|
||||
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
|
||||
effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP ptmp, TEMP pgtmp);
|
||||
match(Set dst (CompressV src pg));
|
||||
ins_cost(13 * SVE_COST);
|
||||
format %{ "sve_compact $dst, $src, $pg\t# vector compress (B)" %}
|
||||
ins_encode %{
|
||||
__ sve_compress_byte(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
|
||||
as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg),
|
||||
as_FloatRegister($vtmp3$$reg),as_FloatRegister($vtmp4$$reg),
|
||||
as_PRegister($ptmp$$reg), as_PRegister($pgtmp$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcompressS(vReg dst, vReg src, pReg pg, vReg vtmp1, vReg vtmp2, pRegGov pgtmp) %{
|
||||
predicate(UseSVE > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
|
||||
effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP pgtmp);
|
||||
match(Set dst (CompressV src pg));
|
||||
ins_cost(38 * SVE_COST);
|
||||
format %{ "sve_compact $dst, $src, $pg\t# vector compress (H)" %}
|
||||
ins_encode %{
|
||||
__ sve_compress_short(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_PRegister($pg$$reg),
|
||||
as_FloatRegister($vtmp1$$reg),as_FloatRegister($vtmp2$$reg), as_PRegister($pgtmp$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
|
||||
match(Set dst (ExpandV src pg));
|
||||
effect(TEMP_DEF dst);
|
||||
ins_cost(4 * SVE_COST);
|
||||
format %{ "sve_dup $dst, S/D, 0\n\t"
|
||||
"sve_histcnt $dst, S/D, $pg, $dst, $dst\n\t"
|
||||
"sve_sub $dst, S/D, 1\n\t"
|
||||
"sve_tbl $dst, S/D, $src, $dst\t# vector expand (S/D)" %}
|
||||
ins_encode %{
|
||||
// Example input: src = 1 2 3 4 5 6 7 8
|
||||
// pg = 1 0 0 1 1 0 1 1
|
||||
// Expected result: dst = 4 0 0 5 6 0 7 8
|
||||
|
||||
// The basic idea is to use TBL which can shuffle the elements in the given
|
||||
// vector flexibly. HISTCNT + SUB is used to generate the second source input
|
||||
// for TBL whose value is used to select the indexed element from src vector.
|
||||
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
// dst = 0 0 0 0 0 0 0 0
|
||||
__ sve_dup(as_FloatRegister($dst$$reg), size, 0);
|
||||
// dst = 5 0 0 4 3 0 2 1
|
||||
__ sve_histcnt(as_FloatRegister($dst$$reg), size, as_PRegister($pg$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
|
||||
// dst = 4 -1 -1 3 2 -1 1 0
|
||||
__ sve_sub(as_FloatRegister($dst$$reg), size, 1);
|
||||
// dst = 4 0 0 5 6 0 7 8
|
||||
__ sve_tbl(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($dst$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set pg (VectorMaskGen len));
|
||||
@ -3247,3 +3404,79 @@ instruct vmask_gen(pRegGov pg, iRegL len, rFlagsReg cr) %{
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
dnl
|
||||
dnl BITWISE_UNARY($1, $2, $3 )
|
||||
dnl BITWISE_UNARY(insn_name, op_name, insn)
|
||||
define(`BITWISE_UNARY', `
|
||||
instruct $1(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0 &&
|
||||
!n->as_Vector()->is_predicated_vector());
|
||||
match(Set dst ($2 src));
|
||||
ins_cost(ifelse($2, `CountTrailingZerosV', `2 * ', `')SVE_COST);
|
||||
format %{ ifelse($2, `CountTrailingZerosV', `"sve_rbit $dst, $src\n\t"
|
||||
"$3 $dst, $dst', `"$3 $dst, $src')\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);dnl
|
||||
ifelse($2, `CountTrailingZerosV', `
|
||||
__ sve_rbit(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));', `')dnl
|
||||
ifelse($2, `ReverseBytesV', `
|
||||
if (bt == T_BYTE) {
|
||||
if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
|
||||
__ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
|
||||
}
|
||||
} else {
|
||||
__ $3(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($src$$reg));
|
||||
}', `
|
||||
__ $3(as_FloatRegister($dst$$reg), size, ptrue, as_FloatRegister($ifelse($2, `CountTrailingZerosV', dst, src)$$reg));')
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
dnl BITWISE_UNARY_PREDICATE($1, $2, $3 )
|
||||
dnl BITWISE_UNARY_PREDICATE(insn_name, op_name, insn)
|
||||
define(`BITWISE_UNARY_PREDICATE', `
|
||||
// The dst and src should use the same register to make sure the
|
||||
// inactive lanes in dst save the same elements as src.
|
||||
instruct $1_masked(vReg dst_src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src ($2 dst_src pg));
|
||||
ins_cost(ifelse($2, `CountTrailingZerosV', `2 * ', `')SVE_COST);
|
||||
format %{ ifelse($2, `CountTrailingZerosV', `"sve_rbit $dst_src, $pg, $dst_src\n\t"
|
||||
"$3 $dst_src, $pg, $dst_src', `"$3 $dst_src, $pg, $dst_src')\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);dnl
|
||||
ifelse($2, `CountTrailingZerosV', `
|
||||
__ sve_rbit(as_FloatRegister($dst_src$$reg), size,
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));', `')dnl
|
||||
ifelse($2, `ReverseBytesV', `
|
||||
if (bt == T_BYTE) {
|
||||
// do nothing
|
||||
} else {
|
||||
__ $3(as_FloatRegister($dst_src$$reg), size,
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));
|
||||
}', `
|
||||
__ $3(as_FloatRegister($dst_src$$reg), size,
|
||||
as_PRegister($pg$$reg), as_FloatRegister($dst_src$$reg));')
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// ------------------------------ CountLeadingZerosV ------------------------------
|
||||
BITWISE_UNARY(vcountLeadingZeros, CountLeadingZerosV, sve_clz)
|
||||
BITWISE_UNARY_PREDICATE(vcountLeadingZeros, CountLeadingZerosV, sve_clz)
|
||||
|
||||
// ------------------------------ CountTrailingZerosV -----------------------------
|
||||
BITWISE_UNARY(vcountTrailingZeros, CountTrailingZerosV, sve_clz)
|
||||
BITWISE_UNARY_PREDICATE(vcountTrailingZeros, CountTrailingZerosV, sve_clz)
|
||||
|
||||
// ---------------------------------- ReverseV ------------------------------------
|
||||
BITWISE_UNARY(vreverse, ReverseV, sve_rbit)
|
||||
BITWISE_UNARY_PREDICATE(vreverse, ReverseV, sve_rbit)
|
||||
|
||||
// -------------------------------- ReverseBytesV ---------------------------------
|
||||
BITWISE_UNARY(vreverseBytes, ReverseBytesV, sve_revb)
|
||||
BITWISE_UNARY_PREDICATE(vreverseBytes, ReverseBytesV, sve_revb)
|
||||
|
||||
|
||||
@ -3134,6 +3134,7 @@ public:
|
||||
INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
|
||||
INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right
|
||||
INSN(sve_bic, 0b00000100, 0b011011000); // vector bitwise clear
|
||||
INSN(sve_clz, 0b00000100, 0b011001101); // vector count leading zero bits
|
||||
INSN(sve_cnt, 0b00000100, 0b011010101); // count non-zero bits
|
||||
INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element
|
||||
INSN(sve_eor, 0b00000100, 0b011001000); // vector eor
|
||||
@ -3793,6 +3794,19 @@ void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
|
||||
INSN(sve_lastb, 0b1);
|
||||
#undef INSN
|
||||
|
||||
// SVE reverse within elements
|
||||
#define INSN(NAME, opc, cond) \
|
||||
void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn) { \
|
||||
starti; \
|
||||
assert(cond, "invalid size"); \
|
||||
f(0b00000101, 31, 24), f(T, 23, 22), f(0b1001, 21, 18), f(opc, 17, 16); \
|
||||
f(0b100, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_revb, 0b00, T == H || T == S || T == D);
|
||||
INSN(sve_rbit, 0b11, T != Q);
|
||||
#undef INSN
|
||||
|
||||
// SVE Create index starting from general-purpose register and incremented by immediate
|
||||
void sve_index(FloatRegister Zd, SIMD_RegVariant T, Register Rn, int imm) {
|
||||
starti;
|
||||
@ -3819,6 +3833,23 @@ void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
|
||||
f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
// Shuffle active elements of vector to the right and fill with zero
|
||||
void sve_compact(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, PRegister Pg) {
|
||||
starti;
|
||||
assert(T == S || T == D, "invalid size");
|
||||
f(0b00000101, 31, 24), f(T, 23, 22), f(0b100001100, 21, 13);
|
||||
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
// SVE2 Count matching elements in vector
|
||||
void sve_histcnt(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg,
|
||||
FloatRegister Zn, FloatRegister Zm) {
|
||||
starti;
|
||||
assert(T == S || T == D, "invalid size");
|
||||
f(0b01000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);
|
||||
f(0b110, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
// SVE2 bitwise permute
|
||||
#define INSN(NAME, opc) \
|
||||
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
|
||||
|
||||
@ -1113,10 +1113,12 @@ void C2_MacroAssembler::sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst
|
||||
sve_uzp1(dst, S, src, tmp);
|
||||
break;
|
||||
case H:
|
||||
assert_different_registers(dst, tmp);
|
||||
sve_uzp1(dst, S, src, tmp);
|
||||
sve_uzp1(dst, H, dst, tmp);
|
||||
break;
|
||||
case B:
|
||||
assert_different_registers(dst, tmp);
|
||||
sve_uzp1(dst, S, src, tmp);
|
||||
sve_uzp1(dst, H, dst, tmp);
|
||||
sve_uzp1(dst, B, dst, tmp);
|
||||
@ -1128,6 +1130,7 @@ void C2_MacroAssembler::sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst
|
||||
if (dst_size == H) {
|
||||
sve_uzp1(dst, H, src, tmp);
|
||||
} else { // B
|
||||
assert_different_registers(dst, tmp);
|
||||
sve_uzp1(dst, H, src, tmp);
|
||||
sve_uzp1(dst, B, dst, tmp);
|
||||
}
|
||||
@ -1311,6 +1314,154 @@ void C2_MacroAssembler::sve_ptrue_lanecnt(PRegister dst, SIMD_RegVariant size, i
|
||||
}
|
||||
}
|
||||
|
||||
// Pack active elements of src, under the control of mask, into the lowest-numbered elements of dst.
|
||||
// Any remaining elements of dst will be filled with zero.
|
||||
// Clobbers: rscratch1
|
||||
// Preserves: src, mask
|
||||
void C2_MacroAssembler::sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
PRegister pgtmp) {
|
||||
assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
|
||||
assert_different_registers(dst, src, vtmp1, vtmp2);
|
||||
assert_different_registers(mask, pgtmp);
|
||||
|
||||
// Example input: src = 8888 7777 6666 5555 4444 3333 2222 1111
|
||||
// mask = 0001 0000 0000 0001 0001 0000 0001 0001
|
||||
// Expected result: dst = 0000 0000 0000 8888 5555 4444 2222 1111
|
||||
sve_dup(vtmp2, H, 0);
|
||||
|
||||
// Extend lowest half to type INT.
|
||||
// dst = 00004444 00003333 00002222 00001111
|
||||
sve_uunpklo(dst, S, src);
|
||||
// pgtmp = 00000001 00000000 00000001 00000001
|
||||
sve_punpklo(pgtmp, mask);
|
||||
// Pack the active elements in size of type INT to the right,
|
||||
// and fill the remainings with zero.
|
||||
// dst = 00000000 00004444 00002222 00001111
|
||||
sve_compact(dst, S, dst, pgtmp);
|
||||
// Narrow the result back to type SHORT.
|
||||
// dst = 0000 0000 0000 0000 0000 4444 2222 1111
|
||||
sve_uzp1(dst, H, dst, vtmp2);
|
||||
// Count the active elements of lowest half.
|
||||
// rscratch1 = 3
|
||||
sve_cntp(rscratch1, S, ptrue, pgtmp);
|
||||
|
||||
// Repeat to the highest half.
|
||||
// pgtmp = 00000001 00000000 00000000 00000001
|
||||
sve_punpkhi(pgtmp, mask);
|
||||
// vtmp1 = 00008888 00007777 00006666 00005555
|
||||
sve_uunpkhi(vtmp1, S, src);
|
||||
// vtmp1 = 00000000 00000000 00008888 00005555
|
||||
sve_compact(vtmp1, S, vtmp1, pgtmp);
|
||||
// vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
|
||||
sve_uzp1(vtmp1, H, vtmp1, vtmp2);
|
||||
|
||||
// Compressed low: dst = 0000 0000 0000 0000 0000 4444 2222 1111
|
||||
// Compressed high: vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
|
||||
// Left shift(cross lane) compressed high with TRUE_CNT lanes,
|
||||
// TRUE_CNT is the number of active elements in the compressed low.
|
||||
neg(rscratch1, rscratch1);
|
||||
// vtmp2 = {4 3 2 1 0 -1 -2 -3}
|
||||
sve_index(vtmp2, H, rscratch1, 1);
|
||||
// vtmp1 = 0000 0000 0000 8888 5555 0000 0000 0000
|
||||
sve_tbl(vtmp1, H, vtmp1, vtmp2);
|
||||
|
||||
// Combine the compressed high(after shifted) with the compressed low.
|
||||
// dst = 0000 0000 0000 8888 5555 4444 2222 1111
|
||||
sve_orr(dst, dst, vtmp1);
|
||||
}
|
||||
|
||||
// Clobbers: rscratch1, rscratch2
|
||||
// Preserves: src, mask
|
||||
void C2_MacroAssembler::sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
FloatRegister vtmp3, FloatRegister vtmp4,
|
||||
PRegister ptmp, PRegister pgtmp) {
|
||||
assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
|
||||
assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3, vtmp4);
|
||||
assert_different_registers(mask, ptmp, pgtmp);
|
||||
// Example input: src = 88 77 66 55 44 33 22 11
|
||||
// mask = 01 00 00 01 01 00 01 01
|
||||
// Expected result: dst = 00 00 00 88 55 44 22 11
|
||||
|
||||
sve_dup(vtmp4, B, 0);
|
||||
// Extend lowest half to type SHORT.
|
||||
// vtmp1 = 0044 0033 0022 0011
|
||||
sve_uunpklo(vtmp1, H, src);
|
||||
// ptmp = 0001 0000 0001 0001
|
||||
sve_punpklo(ptmp, mask);
|
||||
// Count the active elements of lowest half.
|
||||
// rscratch2 = 3
|
||||
sve_cntp(rscratch2, H, ptrue, ptmp);
|
||||
// Pack the active elements in size of type SHORT to the right,
|
||||
// and fill the remainings with zero.
|
||||
// dst = 0000 0044 0022 0011
|
||||
sve_compress_short(dst, vtmp1, ptmp, vtmp2, vtmp3, pgtmp);
|
||||
// Narrow the result back to type BYTE.
|
||||
// dst = 00 00 00 00 00 44 22 11
|
||||
sve_uzp1(dst, B, dst, vtmp4);
|
||||
|
||||
// Repeat to the highest half.
|
||||
// ptmp = 0001 0000 0000 0001
|
||||
sve_punpkhi(ptmp, mask);
|
||||
// vtmp1 = 0088 0077 0066 0055
|
||||
sve_uunpkhi(vtmp2, H, src);
|
||||
// vtmp1 = 0000 0000 0088 0055
|
||||
sve_compress_short(vtmp1, vtmp2, ptmp, vtmp3, vtmp4, pgtmp);
|
||||
|
||||
sve_dup(vtmp4, B, 0);
|
||||
// vtmp1 = 00 00 00 00 00 00 88 55
|
||||
sve_uzp1(vtmp1, B, vtmp1, vtmp4);
|
||||
|
||||
// Compressed low: dst = 00 00 00 00 00 44 22 11
|
||||
// Compressed high: vtmp1 = 00 00 00 00 00 00 88 55
|
||||
// Left shift(cross lane) compressed high with TRUE_CNT lanes,
|
||||
// TRUE_CNT is the number of active elements in the compressed low.
|
||||
neg(rscratch2, rscratch2);
|
||||
// vtmp2 = {4 3 2 1 0 -1 -2 -3}
|
||||
sve_index(vtmp2, B, rscratch2, 1);
|
||||
// vtmp1 = 00 00 00 88 55 00 00 00
|
||||
sve_tbl(vtmp1, B, vtmp1, vtmp2);
|
||||
// Combine the compressed high(after shifted) with the compressed low.
|
||||
// dst = 00 00 00 88 55 44 22 11
|
||||
sve_orr(dst, dst, vtmp1);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ) {
|
||||
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported basic type");
|
||||
SIMD_Arrangement size = isQ ? T16B : T8B;
|
||||
if (bt == T_BYTE) {
|
||||
rbit(dst, size, src);
|
||||
} else {
|
||||
neon_reverse_bytes(dst, src, bt, isQ);
|
||||
rbit(dst, size, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::neon_reverse_bytes(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ) {
|
||||
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported basic type");
|
||||
SIMD_Arrangement size = isQ ? T16B : T8B;
|
||||
switch (bt) {
|
||||
case T_BYTE:
|
||||
if (dst != src) {
|
||||
orr(dst, size, src, src);
|
||||
}
|
||||
break;
|
||||
case T_SHORT:
|
||||
rev16(dst, size, src);
|
||||
break;
|
||||
case T_INT:
|
||||
rev32(dst, size, src);
|
||||
break;
|
||||
case T_LONG:
|
||||
rev64(dst, size, src);
|
||||
break;
|
||||
default:
|
||||
assert(false, "unsupported");
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
// Extract a scalar element from an sve vector at position 'idx'.
|
||||
// The input elements in src are expected to be of integral type.
|
||||
void C2_MacroAssembler::sve_extract_integral(Register dst, SIMD_RegVariant size, FloatRegister src, int idx,
|
||||
|
||||
@ -105,4 +105,20 @@
|
||||
FloatRegister tmp2, PRegister ptmp,
|
||||
SIMD_RegVariant T);
|
||||
|
||||
// Pack active elements of src, under the control of mask, into the
|
||||
// lowest-numbered elements of dst. Any remaining elements of dst will
|
||||
// be filled with zero.
|
||||
void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
FloatRegister vtmp3, FloatRegister vtmp4,
|
||||
PRegister ptmp, PRegister pgtmp);
|
||||
|
||||
void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
PRegister pgtmp);
|
||||
|
||||
void neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);
|
||||
|
||||
void neon_reverse_bytes(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);
|
||||
|
||||
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
|
||||
|
||||
@ -4838,18 +4838,54 @@ void Assembler::popcntl(Register dst, Register src) {
|
||||
emit_int24(0x0F, (unsigned char)0xB8, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
void Assembler::evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512_bitalg(), "must support avx512bitalg feature");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x54, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpopcntw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512_bitalg(), "must support avx512bitalg feature");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x54, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpopcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x55, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpopcntq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
void Assembler::evpopcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x55, (0xC0 | encode));
|
||||
}
|
||||
@ -7894,6 +7930,32 @@ void Assembler::evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, in
|
||||
emit_int16(0x14, (unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evplzcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512cd(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x44, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evplzcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512cd(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x44, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
|
||||
@ -7930,6 +7992,84 @@ void Assembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegis
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::evexpandps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0x88, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evexpandpd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0x88, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpexpandb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512_vbmi2(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x62, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpexpandw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512_vbmi2(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x62, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpexpandd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0x89, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpexpandq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0x89, (0xC0 | encode));
|
||||
}
|
||||
|
||||
// vinserti forms
|
||||
|
||||
void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||
@ -7973,7 +8113,7 @@ void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||
}
|
||||
|
||||
void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionMark im(this);
|
||||
@ -8032,7 +8172,7 @@ void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8
|
||||
}
|
||||
|
||||
void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
@ -8045,7 +8185,7 @@ void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||
}
|
||||
|
||||
void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
assert(imm8 <= 0x03, "imm8: %u", imm8);
|
||||
InstructionMark im(this);
|
||||
@ -8346,6 +8486,20 @@ void Assembler::vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int v
|
||||
emit_int16((unsigned char)0xF6, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpunpckhwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16(0x69, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpunpcklwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16(0x61, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 0, "requires some form of AVX");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
@ -9862,6 +10016,14 @@ void Assembler::evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegist
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len) {
|
||||
assert(VM_Version::supports_gfni(), "requires GFNI support");
|
||||
assert(VM_Version::supports_sse(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int24((unsigned char)0xCE, (unsigned char)(0xC0 | encode), imm8);
|
||||
}
|
||||
|
||||
// duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
|
||||
void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX >= 2, "");
|
||||
@ -11555,6 +11717,85 @@ void Assembler::evpmovm2b(XMMRegister dst, KRegister src, int vector_len) {
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x28, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpcompressb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512_vbmi2(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0x63, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpcompressw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512_vbmi2(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0x63, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpcompressd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0x8B, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpcompressq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0x8B, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evcompressps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0x8A, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evcompresspd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0x8A, (0xC0 | encode));
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
|
||||
void Assembler::incl(Register dst) {
|
||||
|
||||
@ -1878,8 +1878,10 @@ private:
|
||||
void popcntl(Register dst, Address src);
|
||||
void popcntl(Register dst, Register src);
|
||||
|
||||
void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpopcntq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evpopcntw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evpopcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evpopcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
#ifdef _LP64
|
||||
void popcntq(Register dst, Address src);
|
||||
@ -1945,6 +1947,12 @@ private:
|
||||
void punpckldq(XMMRegister dst, Address src);
|
||||
void vpunpckldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
// Interleave High Word
|
||||
void vpunpckhwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
// Interleave Low Word
|
||||
void vpunpcklwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
// Interleave High Doublewords
|
||||
void vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
@ -2206,9 +2214,8 @@ private:
|
||||
void shrxq(Register dst, Register src1, Register src2);
|
||||
|
||||
void bzhiq(Register dst, Register src1, Register src2);
|
||||
void pdep(Register dst, Register src1, Register src2);
|
||||
void pext(Register dst, Register src1, Register src2);
|
||||
|
||||
void pdep(Register dst, Register src1, Register src2);
|
||||
|
||||
//====================VECTOR ARITHMETIC=====================================
|
||||
// Add Packed Floating-Point Values
|
||||
@ -2437,6 +2444,8 @@ private:
|
||||
void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
|
||||
void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
|
||||
|
||||
void evplzcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evplzcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
// Sub packed integers
|
||||
void psubb(XMMRegister dst, XMMRegister src);
|
||||
@ -2581,6 +2590,21 @@ private:
|
||||
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
|
||||
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
|
||||
|
||||
// Vector compress/expand instructions.
|
||||
void evpcompressb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evpcompressw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evpcompressd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evpcompressq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evcompressps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evcompresspd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
void evpexpandb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evpexpandw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evpexpandd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evpexpandq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evexpandps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evexpandpd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
// Vector Rotate Left/Right instruction.
|
||||
void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
@ -2737,6 +2761,10 @@ private:
|
||||
void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
// Galois field affine transformation instructions.
|
||||
void vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len);
|
||||
|
||||
protected:
|
||||
// Next instructions require address alignment 16 bytes SSE mode.
|
||||
// They should be called only from corresponding MacroAssembler instructions.
|
||||
|
||||
@ -2292,7 +2292,7 @@ void C2_MacroAssembler::vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister sr
|
||||
case T_FLOAT: vaddps(dst, src1, src2, vlen_enc); return;
|
||||
case T_LONG: vpaddq(dst, src1, src2, vlen_enc); return;
|
||||
case T_DOUBLE: vaddpd(dst, src1, src2, vlen_enc); return;
|
||||
default: assert(false, "%s", type2name(elem_bt));
|
||||
default: fatal("Unsupported type %s", type2name(elem_bt)); return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2309,7 +2309,7 @@ void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register
|
||||
case T_SHORT: evpbroadcastw(dst, src, vlen_enc); return;
|
||||
case T_FLOAT: case T_INT: evpbroadcastd(dst, src, vlen_enc); return;
|
||||
case T_DOUBLE: case T_LONG: evpbroadcastq(dst, src, vlen_enc); return;
|
||||
default: assert(false, "%s", type2name(elem_bt));
|
||||
default: fatal("Unsupported type %s", type2name(elem_bt)); return;
|
||||
}
|
||||
} else {
|
||||
assert(vlen_enc != Assembler::AVX_512bit, "required");
|
||||
@ -2321,7 +2321,7 @@ void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register
|
||||
case T_FLOAT: movdl(dst, src); vbroadcastss(dst, dst, vlen_enc); return;
|
||||
case T_LONG: movdq(dst, src); vpbroadcastq(dst, dst, vlen_enc); return;
|
||||
case T_DOUBLE: movdq(dst, src); vbroadcastsd(dst, dst, vlen_enc); return;
|
||||
default: assert(false, "%s", type2name(elem_bt));
|
||||
default: fatal("Unsupported type %s", type2name(elem_bt)); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2348,7 +2348,9 @@ void C2_MacroAssembler::vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMR
|
||||
vcvtdq2pd(dst, dst, vlen_enc);
|
||||
break;
|
||||
}
|
||||
default: assert(false, "%s", type2name(to_elem_bt));
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(to_elem_bt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4496,6 +4498,71 @@ void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister
|
||||
|
||||
vector_mask_operation_helper(opc, dst, tmp, masklen);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_mask_compress(KRegister dst, KRegister src, Register rtmp1,
|
||||
Register rtmp2, int mask_len) {
|
||||
kmov(rtmp1, src);
|
||||
andq(rtmp1, (0xFFFFFFFFFFFFFFFFUL >> (64 - mask_len)));
|
||||
mov64(rtmp2, -1L);
|
||||
pext(rtmp2, rtmp2, rtmp1);
|
||||
kmov(dst, rtmp2);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
|
||||
bool merge, BasicType bt, int vec_enc) {
|
||||
if (opcode == Op_CompressV) {
|
||||
switch(bt) {
|
||||
case T_BYTE:
|
||||
evpcompressb(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_CHAR:
|
||||
case T_SHORT:
|
||||
evpcompressw(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
evpcompressd(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_FLOAT:
|
||||
evcompressps(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_LONG:
|
||||
evpcompressq(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_DOUBLE:
|
||||
evcompresspd(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(bt));
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
assert(opcode == Op_ExpandV, "");
|
||||
switch(bt) {
|
||||
case T_BYTE:
|
||||
evpexpandb(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_CHAR:
|
||||
case T_SHORT:
|
||||
evpexpandw(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
evpexpandd(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_FLOAT:
|
||||
evexpandps(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_LONG:
|
||||
evpexpandq(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_DOUBLE:
|
||||
evexpandpd(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(bt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void C2_MacroAssembler::vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
|
||||
@ -4559,6 +4626,34 @@ void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, in
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc) {
|
||||
int lane_size = type2aelembytes(bt);
|
||||
bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
|
||||
if ((is_LP64 || lane_size < 8) &&
|
||||
((is_non_subword_integral_type(bt) && VM_Version::supports_avx512vl()) ||
|
||||
(is_subword_type(bt) && VM_Version::supports_avx512vlbw()))) {
|
||||
movptr(rtmp, imm32);
|
||||
switch(lane_size) {
|
||||
case 1 : evpbroadcastb(dst, rtmp, vec_enc); break;
|
||||
case 2 : evpbroadcastw(dst, rtmp, vec_enc); break;
|
||||
case 4 : evpbroadcastd(dst, rtmp, vec_enc); break;
|
||||
case 8 : evpbroadcastq(dst, rtmp, vec_enc); break;
|
||||
fatal("Unsupported lane size %d", lane_size);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
movptr(rtmp, imm32);
|
||||
LP64_ONLY(movq(dst, rtmp)) NOT_LP64(movdl(dst, rtmp));
|
||||
switch(lane_size) {
|
||||
case 1 : vpbroadcastb(dst, dst, vec_enc); break;
|
||||
case 2 : vpbroadcastw(dst, dst, vec_enc); break;
|
||||
case 4 : vpbroadcastd(dst, dst, vec_enc); break;
|
||||
case 8 : vpbroadcastq(dst, dst, vec_enc); break;
|
||||
fatal("Unsupported lane size %d", lane_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Following is lookup table based popcount computation algorithm:-
|
||||
@ -4589,62 +4684,98 @@ void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, in
|
||||
// f. Perform step e. for upper 128bit vector lane.
|
||||
// g. Pack the bitset count of quadwords back to double word.
|
||||
// h. Unpacking and packing operations are not needed for 64bit vector lane.
|
||||
|
||||
void C2_MacroAssembler::vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc) {
|
||||
assert((vec_enc == Assembler::AVX_512bit && VM_Version::supports_avx512bw()) || VM_Version::supports_avx2(), "");
|
||||
vbroadcast(T_INT, xtmp1, 0x0F0F0F0F, rtmp, vec_enc);
|
||||
vpsrlw(dst, src, 4, vec_enc);
|
||||
vpand(dst, dst, xtmp1, vec_enc);
|
||||
vpand(xtmp1, src, xtmp1, vec_enc);
|
||||
vmovdqu(xtmp2, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), rtmp, vec_enc);
|
||||
vpshufb(xtmp1, xtmp2, xtmp1, vec_enc);
|
||||
vpshufb(dst, xtmp2, dst, vec_enc);
|
||||
vpaddb(dst, dst, xtmp1, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
|
||||
int vec_enc) {
|
||||
if (VM_Version::supports_avx512_vpopcntdq()) {
|
||||
vpopcntd(dst, src, vec_enc);
|
||||
} else {
|
||||
assert((vec_enc == Assembler::AVX_512bit && VM_Version::supports_avx512bw()) || VM_Version::supports_avx2(), "");
|
||||
movl(rtmp, 0x0F0F0F0F);
|
||||
movdl(xtmp1, rtmp);
|
||||
vpbroadcastd(xtmp1, xtmp1, vec_enc);
|
||||
if (Assembler::AVX_512bit == vec_enc) {
|
||||
evmovdqul(xtmp2, k0, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), false, vec_enc, rtmp);
|
||||
} else {
|
||||
vmovdqu(xtmp2, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), rtmp);
|
||||
}
|
||||
vpand(xtmp3, src, xtmp1, vec_enc);
|
||||
vpshufb(xtmp3, xtmp2, xtmp3, vec_enc);
|
||||
vpsrlw(dst, src, 4, vec_enc);
|
||||
vpand(dst, dst, xtmp1, vec_enc);
|
||||
vpshufb(dst, xtmp2, dst, vec_enc);
|
||||
vpaddb(xtmp3, dst, xtmp3, vec_enc);
|
||||
vpxor(xtmp1, xtmp1, xtmp1, vec_enc);
|
||||
vpunpckhdq(dst, xtmp3, xtmp1, vec_enc);
|
||||
vpsadbw(dst, dst, xtmp1, vec_enc);
|
||||
vpunpckldq(xtmp2, xtmp3, xtmp1, vec_enc);
|
||||
vpsadbw(xtmp2, xtmp2, xtmp1, vec_enc);
|
||||
vpackuswb(dst, xtmp2, dst, vec_enc);
|
||||
}
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc) {
|
||||
vector_popcount_byte(xtmp1, src, dst, xtmp2, rtmp, vec_enc);
|
||||
// Following code is as per steps e,f,g and h of above algorithm.
|
||||
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
|
||||
vpunpckhdq(dst, xtmp1, xtmp2, vec_enc);
|
||||
vpsadbw(dst, dst, xtmp2, vec_enc);
|
||||
vpunpckldq(xtmp1, xtmp1, xtmp2, vec_enc);
|
||||
vpsadbw(xtmp1, xtmp1, xtmp2, vec_enc);
|
||||
vpackuswb(dst, xtmp1, dst, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc) {
|
||||
vector_popcount_byte(xtmp1, src, dst, xtmp2, rtmp, vec_enc);
|
||||
// Add the popcount of upper and lower bytes of word.
|
||||
vbroadcast(T_INT, xtmp2, 0x00FF00FF, rtmp, vec_enc);
|
||||
vpsrlw(dst, xtmp1, 8, vec_enc);
|
||||
vpand(xtmp1, xtmp1, xtmp2, vec_enc);
|
||||
vpaddw(dst, dst, xtmp1, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
|
||||
int vec_enc) {
|
||||
if (VM_Version::supports_avx512_vpopcntdq()) {
|
||||
vpopcntq(dst, src, vec_enc);
|
||||
} else if (vec_enc == Assembler::AVX_512bit) {
|
||||
assert(VM_Version::supports_avx512bw(), "");
|
||||
movl(rtmp, 0x0F0F0F0F);
|
||||
movdl(xtmp1, rtmp);
|
||||
vpbroadcastd(xtmp1, xtmp1, vec_enc);
|
||||
evmovdqul(xtmp2, k0, ExternalAddress(StubRoutines::x86::vector_popcount_lut()), true, vec_enc, rtmp);
|
||||
vpandq(xtmp3, src, xtmp1, vec_enc);
|
||||
vpshufb(xtmp3, xtmp2, xtmp3, vec_enc);
|
||||
vpsrlw(dst, src, 4, vec_enc);
|
||||
vpandq(dst, dst, xtmp1, vec_enc);
|
||||
vpshufb(dst, xtmp2, dst, vec_enc);
|
||||
vpaddb(xtmp3, dst, xtmp3, vec_enc);
|
||||
vpxorq(xtmp1, xtmp1, xtmp1, vec_enc);
|
||||
vpsadbw(dst, xtmp3, xtmp1, vec_enc);
|
||||
} else {
|
||||
// We do not see any performance benefit of running
|
||||
// above instruction sequence on 256 bit vector which
|
||||
// can operate over maximum 4 long elements.
|
||||
ShouldNotReachHere();
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc) {
|
||||
vector_popcount_byte(xtmp1, src, dst, xtmp2, rtmp, vec_enc);
|
||||
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
|
||||
vpsadbw(dst, xtmp1, xtmp2, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc) {
|
||||
switch(bt) {
|
||||
case T_LONG:
|
||||
vector_popcount_long(dst, src, xtmp1, xtmp2, rtmp, vec_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
vector_popcount_int(dst, src, xtmp1, xtmp2, rtmp, vec_enc);
|
||||
break;
|
||||
case T_CHAR:
|
||||
case T_SHORT:
|
||||
vector_popcount_short(dst, src, xtmp1, xtmp2, rtmp, vec_enc);
|
||||
break;
|
||||
case T_BYTE:
|
||||
case T_BOOLEAN:
|
||||
vector_popcount_byte(dst, src, xtmp1, xtmp2, rtmp, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(bt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src,
|
||||
KRegister mask, bool merge, int vec_enc) {
|
||||
assert(VM_Version::supports_avx512vl() || vec_enc == Assembler::AVX_512bit, "");
|
||||
switch(bt) {
|
||||
case T_LONG:
|
||||
assert(VM_Version::supports_avx512_vpopcntdq(), "");
|
||||
evpopcntq(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
assert(VM_Version::supports_avx512_vpopcntdq(), "");
|
||||
evpopcntd(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_CHAR:
|
||||
case T_SHORT:
|
||||
assert(VM_Version::supports_avx512_bitalg(), "");
|
||||
evpopcntw(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
case T_BYTE:
|
||||
case T_BOOLEAN:
|
||||
assert(VM_Version::supports_avx512_bitalg(), "");
|
||||
evpopcntb(dst, mask, src, merge, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(bt));
|
||||
break;
|
||||
}
|
||||
evpmovqd(dst, dst, vec_enc);
|
||||
}
|
||||
|
||||
#ifndef _LP64
|
||||
@ -4655,6 +4786,374 @@ void C2_MacroAssembler::vector_maskall_operation32(KRegister dst, Register src,
|
||||
}
|
||||
#endif
|
||||
|
||||
// Bit reversal algorithm first reverses the bits of each byte followed by
|
||||
// a byte level reversal for multi-byte primitive types (short/int/long).
|
||||
// Algorithm performs a lookup table access to get reverse bit sequence
|
||||
// corresponding to a 4 bit value. Thus a reverse bit sequence for a byte
|
||||
// is obtained by swapping the reverse bit sequences of upper and lower
|
||||
// nibble of a byte.
|
||||
void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc) {
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
|
||||
// Get the reverse bit sequence of lower nibble of each byte.
|
||||
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), rtmp, vec_enc);
|
||||
vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc);
|
||||
vpandq(dst, xtmp2, src, vec_enc);
|
||||
vpshufb(dst, xtmp1, dst, vec_enc);
|
||||
vpsllq(dst, dst, 4, vec_enc);
|
||||
|
||||
// Get the reverse bit sequence of upper nibble of each byte.
|
||||
vpandn(xtmp2, xtmp2, src, vec_enc);
|
||||
vpsrlq(xtmp2, xtmp2, 4, vec_enc);
|
||||
vpshufb(xtmp2, xtmp1, xtmp2, vec_enc);
|
||||
|
||||
// Perform logical OR operation b/w left shifted reverse bit sequence of lower nibble and
|
||||
// right shifted reverse bit sequence of upper nibble to obtain the reverse bit sequence of each byte.
|
||||
vporq(xtmp2, dst, xtmp2, vec_enc);
|
||||
vector_reverse_byte(bt, dst, xtmp2, rtmp, vec_enc);
|
||||
|
||||
} else if(vec_enc == Assembler::AVX_512bit) {
|
||||
// Shift based bit reversal.
|
||||
assert(bt == T_LONG || bt == T_INT, "");
|
||||
|
||||
// Swap lower and upper nibble of each byte.
|
||||
vector_swap_nbits(4, 0x0F0F0F0F, xtmp1, src, xtmp2, rtmp, vec_enc);
|
||||
|
||||
// Swap two least and most significant bits of each nibble.
|
||||
vector_swap_nbits(2, 0x33333333, dst, xtmp1, xtmp2, rtmp, vec_enc);
|
||||
|
||||
// Swap adjacent pair of bits.
|
||||
evmovdqul(xtmp1, k0, dst, true, vec_enc);
|
||||
vector_swap_nbits(1, 0x55555555, dst, xtmp1, xtmp2, rtmp, vec_enc);
|
||||
|
||||
evmovdqul(xtmp1, k0, dst, true, vec_enc);
|
||||
vector_reverse_byte64(bt, dst, xtmp1, xtmp1, xtmp2, rtmp, vec_enc);
|
||||
} else {
|
||||
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), rtmp, vec_enc);
|
||||
vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc);
|
||||
|
||||
// Get the reverse bit sequence of lower nibble of each byte.
|
||||
vpand(dst, xtmp2, src, vec_enc);
|
||||
vpshufb(dst, xtmp1, dst, vec_enc);
|
||||
vpsllq(dst, dst, 4, vec_enc);
|
||||
|
||||
// Get the reverse bit sequence of upper nibble of each byte.
|
||||
vpandn(xtmp2, xtmp2, src, vec_enc);
|
||||
vpsrlq(xtmp2, xtmp2, 4, vec_enc);
|
||||
vpshufb(xtmp2, xtmp1, xtmp2, vec_enc);
|
||||
|
||||
// Perform logical OR operation b/w left shifted reverse bit sequence of lower nibble and
|
||||
// right shifted reverse bit sequence of upper nibble to obtain the reverse bit sequence of each byte.
|
||||
vpor(xtmp2, dst, xtmp2, vec_enc);
|
||||
vector_reverse_byte(bt, dst, xtmp2, rtmp, vec_enc);
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src,
|
||||
XMMRegister xtmp, AddressLiteral mask, Register rtmp, int vec_enc) {
|
||||
// Galois field instruction based bit reversal based on following algorithm.
|
||||
// http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html
|
||||
assert(VM_Version::supports_gfni(), "");
|
||||
vpbroadcastq(xtmp, mask, vec_enc, rtmp);
|
||||
vgf2p8affineqb(xtmp, src, xtmp, 0, vec_enc);
|
||||
vector_reverse_byte(bt, dst, xtmp, rtmp, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
|
||||
XMMRegister xtmp1, Register rtmp, int vec_enc) {
|
||||
vbroadcast(T_INT, xtmp1, bitmask, rtmp, vec_enc);
|
||||
vpandq(dst, xtmp1, src, vec_enc);
|
||||
vpsllq(dst, dst, nbits, vec_enc);
|
||||
vpandn(xtmp1, xtmp1, src, vec_enc);
|
||||
vpsrlq(xtmp1, xtmp1, nbits, vec_enc);
|
||||
vporq(dst, dst, xtmp1, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc) {
|
||||
// Shift based bit reversal.
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
switch(bt) {
|
||||
case T_LONG:
|
||||
// Swap upper and lower double word of each quad word.
|
||||
evprorq(xtmp1, k0, src, 32, true, vec_enc);
|
||||
evprord(xtmp1, k0, xtmp1, 16, true, vec_enc);
|
||||
vector_swap_nbits(8, 0x00FF00FF, dst, xtmp1, xtmp2, rtmp, vec_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
// Swap upper and lower word of each double word.
|
||||
evprord(xtmp1, k0, src, 16, true, vec_enc);
|
||||
vector_swap_nbits(8, 0x00FF00FF, dst, xtmp1, xtmp2, rtmp, vec_enc);
|
||||
break;
|
||||
case T_SHORT:
|
||||
// Swap upper and lower byte of each word.
|
||||
vector_swap_nbits(8, 0x00FF00FF, dst, src, xtmp2, rtmp, vec_enc);
|
||||
break;
|
||||
case T_BYTE:
|
||||
evmovdquq(dst, k0, src, true, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(bt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, Register rtmp, int vec_enc) {
|
||||
if (bt == T_BYTE) {
|
||||
if (VM_Version::supports_avx512vl() || vec_enc == Assembler::AVX_512bit) {
|
||||
evmovdquq(dst, k0, src, true, vec_enc);
|
||||
} else {
|
||||
vmovdqu(dst, src);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Perform byte reversal by shuffling the bytes of a multi-byte primitive type using
|
||||
// pre-computed shuffle indices.
|
||||
switch(bt) {
|
||||
case T_LONG:
|
||||
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_long()), rtmp, vec_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_int()), rtmp, vec_enc);
|
||||
break;
|
||||
case T_SHORT:
|
||||
vmovdqu(dst, ExternalAddress(StubRoutines::x86::vector_reverse_byte_perm_mask_short()), rtmp, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(bt));
|
||||
break;
|
||||
}
|
||||
vpshufb(dst, src, dst, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
|
||||
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
|
||||
KRegister ktmp, Register rtmp, bool merge, int vec_enc) {
|
||||
assert(is_integral_type(bt), "");
|
||||
assert(VM_Version::supports_avx512vl() || vec_enc == Assembler::AVX_512bit, "");
|
||||
assert(VM_Version::supports_avx512cd(), "");
|
||||
switch(bt) {
|
||||
case T_LONG:
|
||||
evplzcntq(dst, ktmp, src, merge, vec_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
evplzcntd(dst, ktmp, src, merge, vec_enc);
|
||||
break;
|
||||
case T_SHORT:
|
||||
vpternlogd(xtmp1, 0xff, xtmp1, xtmp1, vec_enc);
|
||||
vpunpcklwd(xtmp2, xtmp1, src, vec_enc);
|
||||
evplzcntd(xtmp2, ktmp, xtmp2, merge, vec_enc);
|
||||
vpunpckhwd(dst, xtmp1, src, vec_enc);
|
||||
evplzcntd(dst, ktmp, dst, merge, vec_enc);
|
||||
vpackusdw(dst, xtmp2, dst, vec_enc);
|
||||
break;
|
||||
case T_BYTE:
|
||||
// T1 = Compute leading zero counts of 4 LSB bits of each byte by
|
||||
// accessing the lookup table.
|
||||
// T2 = Compute leading zero counts of 4 MSB bits of each byte by
|
||||
// accessing the lookup table.
|
||||
// Add T1 to T2 if 4 MSB bits of byte are all zeros.
|
||||
assert(VM_Version::supports_avx512bw(), "");
|
||||
evmovdquq(xtmp1, ExternalAddress(StubRoutines::x86::vector_count_leading_zeros_lut()), vec_enc, rtmp);
|
||||
vbroadcast(T_INT, dst, 0x0F0F0F0F, rtmp, vec_enc);
|
||||
vpand(xtmp2, dst, src, vec_enc);
|
||||
vpshufb(xtmp2, xtmp1, xtmp2, vec_enc);
|
||||
vpsrlw(xtmp3, src, 4, vec_enc);
|
||||
vpand(xtmp3, dst, xtmp3, vec_enc);
|
||||
vpshufb(dst, xtmp1, xtmp3, vec_enc);
|
||||
vpxor(xtmp1, xtmp1, xtmp1, vec_enc);
|
||||
evpcmpeqb(ktmp, xtmp1, xtmp3, vec_enc);
|
||||
evpaddb(dst, ktmp, dst, xtmp2, true, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(bt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc) {
|
||||
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_count_leading_zeros_lut()), rtmp);
|
||||
vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc);
|
||||
// T1 = Compute leading zero counts of 4 LSB bits of each byte by
|
||||
// accessing the lookup table.
|
||||
vpand(dst, xtmp2, src, vec_enc);
|
||||
vpshufb(dst, xtmp1, dst, vec_enc);
|
||||
// T2 = Compute leading zero counts of 4 MSB bits of each byte by
|
||||
// accessing the lookup table.
|
||||
vpsrlw(xtmp3, src, 4, vec_enc);
|
||||
vpand(xtmp3, xtmp2, xtmp3, vec_enc);
|
||||
vpshufb(xtmp2, xtmp1, xtmp3, vec_enc);
|
||||
// Add T1 to T2 if 4 MSB bits of byte are all zeros.
|
||||
vpxor(xtmp1, xtmp1, xtmp1, vec_enc);
|
||||
vpcmpeqb(xtmp3, xtmp1, xtmp3, vec_enc);
|
||||
vpaddb(dst, dst, xtmp2, vec_enc);
|
||||
vpblendvb(dst, xtmp2, dst, xtmp3, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc) {
|
||||
vector_count_leading_zeros_byte_avx(dst, src, xtmp1, xtmp2, xtmp3, rtmp, vec_enc);
|
||||
// Add zero counts of lower byte and upper byte of a word if
|
||||
// upper byte holds a zero value.
|
||||
vpsrlw(xtmp3, src, 8, vec_enc);
|
||||
// xtmp1 is set to all zeros by vector_count_leading_zeros_byte_avx.
|
||||
vpcmpeqw(xtmp3, xtmp1, xtmp3, vec_enc);
|
||||
vpsllw(xtmp2, dst, 8, vec_enc);
|
||||
vpaddw(xtmp2, xtmp2, dst, vec_enc);
|
||||
vpblendvb(dst, dst, xtmp2, xtmp3, vec_enc);
|
||||
vpsrlw(dst, dst, 8, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc) {
|
||||
// Since IEEE 754 floating point format represents mantissa in 1.0 format
|
||||
// hence biased exponent can be used to compute leading zero count as per
|
||||
// following formula:-
|
||||
// LZCNT = 32 - (biased_exp - 127)
|
||||
// Special handling has been introduced for Zero, Max_Int and -ve source values.
|
||||
|
||||
// Broadcast 0xFF
|
||||
vpcmpeqd(xtmp1, xtmp1, xtmp1, vec_enc);
|
||||
vpsrld(xtmp1, xtmp1, 24, vec_enc);
|
||||
|
||||
// Extract biased exponent.
|
||||
vcvtdq2ps(dst, src, vec_enc);
|
||||
vpsrld(dst, dst, 23, vec_enc);
|
||||
vpand(dst, dst, xtmp1, vec_enc);
|
||||
|
||||
// Broadcast 127.
|
||||
vpsrld(xtmp1, xtmp1, 1, vec_enc);
|
||||
// Exponent = biased_exp - 127
|
||||
vpsubd(dst, dst, xtmp1, vec_enc);
|
||||
|
||||
// Exponent = Exponent + 1
|
||||
vpsrld(xtmp3, xtmp1, 6, vec_enc);
|
||||
vpaddd(dst, dst, xtmp3, vec_enc);
|
||||
|
||||
// Replace -ve exponent with zero, exponent is -ve when src
|
||||
// lane contains a zero value.
|
||||
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
|
||||
vblendvps(dst, dst, xtmp2, dst, vec_enc);
|
||||
|
||||
// Rematerialize broadcast 32.
|
||||
vpslld(xtmp1, xtmp3, 5, vec_enc);
|
||||
// Exponent is 32 if corresponding source lane contains max_int value.
|
||||
vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
|
||||
// LZCNT = 32 - exponent
|
||||
vpsubd(dst, xtmp1, dst, vec_enc);
|
||||
|
||||
// Replace LZCNT with a value 1 if corresponding source lane
|
||||
// contains max_int value.
|
||||
vpblendvb(dst, dst, xtmp3, xtmp2, vec_enc);
|
||||
|
||||
// Replace biased_exp with 0 if source lane value is less than zero.
|
||||
vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
|
||||
vblendvps(dst, dst, xtmp2, src, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc) {
|
||||
vector_count_leading_zeros_short_avx(dst, src, xtmp1, xtmp2, xtmp3, rtmp, vec_enc);
|
||||
// Add zero counts of lower word and upper word of a double word if
|
||||
// upper word holds a zero value.
|
||||
vpsrld(xtmp3, src, 16, vec_enc);
|
||||
// xtmp1 is set to all zeros by vector_count_leading_zeros_byte_avx.
|
||||
vpcmpeqd(xtmp3, xtmp1, xtmp3, vec_enc);
|
||||
vpslld(xtmp2, dst, 16, vec_enc);
|
||||
vpaddd(xtmp2, xtmp2, dst, vec_enc);
|
||||
vpblendvb(dst, dst, xtmp2, xtmp3, vec_enc);
|
||||
vpsrld(dst, dst, 16, vec_enc);
|
||||
// Add zero counts of lower doubleword and upper doubleword of a
|
||||
// quadword if upper doubleword holds a zero value.
|
||||
vpsrlq(xtmp3, src, 32, vec_enc);
|
||||
vpcmpeqq(xtmp3, xtmp1, xtmp3, vec_enc);
|
||||
vpsllq(xtmp2, dst, 32, vec_enc);
|
||||
vpaddq(xtmp2, xtmp2, dst, vec_enc);
|
||||
vpblendvb(dst, dst, xtmp2, xtmp3, vec_enc);
|
||||
vpsrlq(dst, dst, 32, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src,
|
||||
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
|
||||
Register rtmp, int vec_enc) {
|
||||
assert(is_integral_type(bt), "unexpected type");
|
||||
assert(vec_enc < Assembler::AVX_512bit, "");
|
||||
switch(bt) {
|
||||
case T_LONG:
|
||||
vector_count_leading_zeros_long_avx(dst, src, xtmp1, xtmp2, xtmp3, rtmp, vec_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
vector_count_leading_zeros_int_avx(dst, src, xtmp1, xtmp2, xtmp3, vec_enc);
|
||||
break;
|
||||
case T_SHORT:
|
||||
vector_count_leading_zeros_short_avx(dst, src, xtmp1, xtmp2, xtmp3, rtmp, vec_enc);
|
||||
break;
|
||||
case T_BYTE:
|
||||
vector_count_leading_zeros_byte_avx(dst, src, xtmp1, xtmp2, xtmp3, rtmp, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(bt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc) {
|
||||
switch(bt) {
|
||||
case T_BYTE:
|
||||
vpsubb(dst, src1, src2, vec_enc);
|
||||
break;
|
||||
case T_SHORT:
|
||||
vpsubw(dst, src1, src2, vec_enc);
|
||||
break;
|
||||
case T_INT:
|
||||
vpsubd(dst, src1, src2, vec_enc);
|
||||
break;
|
||||
case T_LONG:
|
||||
vpsubq(dst, src1, src2, vec_enc);
|
||||
break;
|
||||
default:
|
||||
fatal("Unsupported type %s", type2name(bt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Trailing zero count computation is based on leading zero count operation as per
|
||||
// following equation. All AVX3 targets support AVX512CD feature which offers
|
||||
// direct vector instruction to compute leading zero count.
|
||||
// CTZ = PRIM_TYPE_WIDHT - CLZ((x - 1) & ~x)
|
||||
void C2_MacroAssembler::vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
|
||||
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
|
||||
XMMRegister xtmp4, KRegister ktmp, Register rtmp, int vec_enc) {
|
||||
assert(is_integral_type(bt), "");
|
||||
// xtmp = -1
|
||||
vpternlogd(xtmp4, 0xff, xtmp4, xtmp4, vec_enc);
|
||||
// xtmp = xtmp + src
|
||||
vpadd(bt, xtmp4, xtmp4, src, vec_enc);
|
||||
// xtmp = xtmp & ~src
|
||||
vpternlogd(xtmp4, 0x40, xtmp4, src, vec_enc);
|
||||
vector_count_leading_zeros_evex(bt, dst, xtmp4, xtmp1, xtmp2, xtmp3, ktmp, rtmp, true, vec_enc);
|
||||
vbroadcast(bt, xtmp4, 8 * type2aelembytes(bt), rtmp, vec_enc);
|
||||
vpsub(bt, dst, xtmp4, dst, vec_enc);
|
||||
}
|
||||
|
||||
// Trailing zero count computation for AVX2 targets is based on popcount operation as per following equation
|
||||
// CTZ = PRIM_TYPE_WIDHT - POPC(x | -x)
|
||||
void C2_MacroAssembler::vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc) {
|
||||
assert(is_integral_type(bt), "");
|
||||
// xtmp = 0
|
||||
vpxor(xtmp3 , xtmp3, xtmp3, vec_enc);
|
||||
// xtmp = 0 - src
|
||||
vpsub(bt, xtmp3, xtmp3, src, vec_enc);
|
||||
// xtmp = xtmp | src
|
||||
vpor(xtmp3, xtmp3, src, vec_enc);
|
||||
vector_popcount_integral(bt, dst, xtmp3, xtmp1, xtmp2, rtmp, vec_enc);
|
||||
vbroadcast(bt, xtmp1, 8 * type2aelembytes(bt), rtmp, vec_enc);
|
||||
vpsub(bt, dst, xtmp1, dst, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::udivI(Register rax, Register divisor, Register rdx) {
|
||||
Label done;
|
||||
Label neg_divisor_fastpath;
|
||||
@ -4817,4 +5316,3 @@ void C2_MacroAssembler::udivmodL(Register rax, Register divisor, Register rdx, R
|
||||
bind(done);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -88,6 +88,11 @@ public:
|
||||
XMMRegister zero, XMMRegister one,
|
||||
Register scratch);
|
||||
|
||||
void vector_compress_expand(int opcode, XMMRegister dst, XMMRegister src, KRegister mask,
|
||||
bool merge, BasicType bt, int vec_enc);
|
||||
|
||||
void vector_mask_compress(KRegister dst, KRegister src, Register rtmp1, Register rtmp2, int mask_len);
|
||||
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
|
||||
void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
|
||||
@ -137,7 +142,6 @@ public:
|
||||
#ifdef _LP64
|
||||
void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc);
|
||||
#endif
|
||||
void vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vlen_enc);
|
||||
|
||||
// blend
|
||||
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
|
||||
@ -341,34 +345,89 @@ public:
|
||||
AddressLiteral new_mxcsr, Register scratch, int vec_enc);
|
||||
#endif
|
||||
|
||||
void udivI(Register rax, Register divisor, Register rdx);
|
||||
void umodI(Register rax, Register divisor, Register rdx);
|
||||
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
|
||||
|
||||
#ifdef _LP64
|
||||
void udivL(Register rax, Register divisor, Register rdx);
|
||||
void umodL(Register rax, Register divisor, Register rdx);
|
||||
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
|
||||
#endif
|
||||
|
||||
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
|
||||
bool merge, BasicType bt, int vlen_enc);
|
||||
|
||||
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
|
||||
bool merge, BasicType bt, int vlen_enc);
|
||||
|
||||
void vector_reverse_bit(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp,
|
||||
AddressLiteral mask, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_reverse_byte(BasicType bt, XMMRegister dst, XMMRegister src, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_popcount_short(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_popcount_byte(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_popcount_integral(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_popcount_integral_evex(BasicType bt, XMMRegister dst, XMMRegister src,
|
||||
KRegister mask, bool merge, int vec_enc);
|
||||
|
||||
void vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_count_leading_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src,
|
||||
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3,
|
||||
KRegister ktmp, Register rtmp, bool merge, int vec_enc);
|
||||
|
||||
void vector_count_leading_zeros_byte_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_count_leading_zeros_short_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_count_leading_zeros_int_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, int vec_enc);
|
||||
|
||||
void vector_count_leading_zeros_long_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_count_leading_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
|
||||
|
||||
void vpadd(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
|
||||
|
||||
void vpsub(BasicType bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, int vec_enc);
|
||||
|
||||
void vector_count_trailing_zeros_evex(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, KRegister ktmp,
|
||||
Register rtmp, int vec_enc);
|
||||
|
||||
void vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
|
||||
XMMRegister xtmp1, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_count_trailing_zeros_avx(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, int vec_enc);
|
||||
|
||||
void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
|
||||
XMMRegister xtmp1, int vec_enc);
|
||||
|
||||
void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
|
||||
KRegister ktmp1, int vec_enc);
|
||||
|
||||
void udivI(Register rax, Register divisor, Register rdx);
|
||||
void umodI(Register rax, Register divisor, Register rdx);
|
||||
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
|
||||
|
||||
#ifdef _LP64
|
||||
void udivL(Register rax, Register divisor, Register rdx);
|
||||
void umodL(Register rax, Register divisor, Register rdx);
|
||||
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
|
||||
#endif
|
||||
|
||||
void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
|
||||
int vec_enc);
|
||||
|
||||
void vector_popcount_long(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
|
||||
int vec_enc);
|
||||
|
||||
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
|
||||
|
||||
@ -2577,8 +2577,9 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat
|
||||
}
|
||||
|
||||
void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len) {
|
||||
assert(vector_len <= AVX_256bit, "AVX2 vector length");
|
||||
if (vector_len == AVX_256bit) {
|
||||
if (vector_len == AVX_512bit) {
|
||||
evmovdquq(dst, src, AVX_512bit, scratch_reg);
|
||||
} else if (vector_len == AVX_256bit) {
|
||||
vmovdqu(dst, src, scratch_reg);
|
||||
} else {
|
||||
movdqu(dst, src, scratch_reg);
|
||||
@ -3229,6 +3230,15 @@ void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_l
|
||||
Assembler::vpbroadcastw(dst, src, vector_len);
|
||||
}
|
||||
|
||||
void MacroAssembler::vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
|
||||
if (reachable(src)) {
|
||||
Assembler::vpbroadcastq(dst, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(rscratch, src);
|
||||
Assembler::vpbroadcastq(dst, Address(rscratch, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
|
||||
if (reachable(src)) {
|
||||
Assembler::vbroadcastsd(dst, as_Address(src), vector_len);
|
||||
|
||||
@ -1347,6 +1347,11 @@ public:
|
||||
|
||||
using Assembler::vbroadcastsd;
|
||||
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
|
||||
void vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
|
||||
void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpbroadcastq(dst, src, vector_len); }
|
||||
void vpbroadcastq(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastq(dst, src, vector_len); }
|
||||
|
||||
|
||||
|
||||
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
|
||||
@ -186,15 +186,29 @@
|
||||
// Returns pre-selection estimated size of a vector operation.
|
||||
static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
|
||||
switch(vopc) {
|
||||
default: return 0;
|
||||
case Op_PopCountVI: return VM_Version::supports_avx512_vpopcntdq() ? 0 : 50;
|
||||
case Op_PopCountVL: return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40;
|
||||
default:
|
||||
return 0;
|
||||
case Op_CountTrailingZerosV:
|
||||
case Op_CountLeadingZerosV:
|
||||
return VM_Version::supports_avx512cd() && (ety == T_INT || ety == T_LONG) ? 0 : 40;
|
||||
case Op_PopCountVI:
|
||||
if (is_subword_type(ety)) {
|
||||
return VM_Version::supports_avx512_bitalg() ? 0 : 50;
|
||||
} else {
|
||||
assert(ety == T_INT, "sanity"); // for documentation purposes
|
||||
return VM_Version::supports_avx512_vpopcntdq() ? 0 : 50;
|
||||
}
|
||||
case Op_PopCountVL:
|
||||
return VM_Version::supports_avx512_vpopcntdq() ? 0 : 40;
|
||||
case Op_ReverseV:
|
||||
return VM_Version::supports_gfni() ? 0 : 30;
|
||||
case Op_RoundVF: // fall through
|
||||
case Op_RoundVD: {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns pre-selection estimated size of a scalar operation.
|
||||
static int scalar_op_pre_select_sz_estimate(int vopc, BasicType ety) {
|
||||
switch(vopc) {
|
||||
|
||||
@ -588,6 +588,30 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_count_leading_zeros_lut(const char *stub_name) {
|
||||
__ align64();
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data(0x02020304, relocInfo::none, 0);
|
||||
__ emit_data(0x01010101, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x02020304, relocInfo::none, 0);
|
||||
__ emit_data(0x01010101, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x02020304, relocInfo::none, 0);
|
||||
__ emit_data(0x01010101, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x02020304, relocInfo::none, 0);
|
||||
__ emit_data(0x01010101, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
return start;
|
||||
}
|
||||
|
||||
|
||||
address generate_popcount_avx_lut(const char *stub_name) {
|
||||
__ align64();
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
@ -635,6 +659,98 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_reverse_bit_lut(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data(0x0C040800, relocInfo::none, 0);
|
||||
__ emit_data(0x0E060A02, relocInfo::none, 0);
|
||||
__ emit_data(0x0D050901, relocInfo::none, 0);
|
||||
__ emit_data(0x0F070B03, relocInfo::none, 0);
|
||||
__ emit_data(0x0C040800, relocInfo::none, 0);
|
||||
__ emit_data(0x0E060A02, relocInfo::none, 0);
|
||||
__ emit_data(0x0D050901, relocInfo::none, 0);
|
||||
__ emit_data(0x0F070B03, relocInfo::none, 0);
|
||||
__ emit_data(0x0C040800, relocInfo::none, 0);
|
||||
__ emit_data(0x0E060A02, relocInfo::none, 0);
|
||||
__ emit_data(0x0D050901, relocInfo::none, 0);
|
||||
__ emit_data(0x0F070B03, relocInfo::none, 0);
|
||||
__ emit_data(0x0C040800, relocInfo::none, 0);
|
||||
__ emit_data(0x0E060A02, relocInfo::none, 0);
|
||||
__ emit_data(0x0D050901, relocInfo::none, 0);
|
||||
__ emit_data(0x0F070B03, relocInfo::none, 0);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_reverse_byte_perm_mask_long(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
|
||||
__ emit_data(0x08090A0B, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
|
||||
__ emit_data(0x08090A0B, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
|
||||
__ emit_data(0x08090A0B, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
|
||||
__ emit_data(0x08090A0B, relocInfo::none, 0);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_reverse_byte_perm_mask_int(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x08090A0B, relocInfo::none, 0);
|
||||
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x08090A0B, relocInfo::none, 0);
|
||||
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x08090A0B, relocInfo::none, 0);
|
||||
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x08090A0B, relocInfo::none, 0);
|
||||
__ emit_data(0x0C0D0E0F, relocInfo::none, 0);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_reverse_byte_perm_mask_short(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data(0x02030001, relocInfo::none, 0);
|
||||
__ emit_data(0x06070405, relocInfo::none, 0);
|
||||
__ emit_data(0x0A0B0809, relocInfo::none, 0);
|
||||
__ emit_data(0x0E0F0C0D, relocInfo::none, 0);
|
||||
__ emit_data(0x02030001, relocInfo::none, 0);
|
||||
__ emit_data(0x06070405, relocInfo::none, 0);
|
||||
__ emit_data(0x0A0B0809, relocInfo::none, 0);
|
||||
__ emit_data(0x0E0F0C0D, relocInfo::none, 0);
|
||||
__ emit_data(0x02030001, relocInfo::none, 0);
|
||||
__ emit_data(0x06070405, relocInfo::none, 0);
|
||||
__ emit_data(0x0A0B0809, relocInfo::none, 0);
|
||||
__ emit_data(0x0E0F0C0D, relocInfo::none, 0);
|
||||
__ emit_data(0x02030001, relocInfo::none, 0);
|
||||
__ emit_data(0x06070405, relocInfo::none, 0);
|
||||
__ emit_data(0x0A0B0809, relocInfo::none, 0);
|
||||
__ emit_data(0x0E0F0C0D, relocInfo::none, 0);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_byte_shuffle_mask(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
@ -4090,8 +4206,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFF);
|
||||
StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x00000001);
|
||||
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
|
||||
StubRoutines::x86::_vector_count_leading_zeros_lut = generate_count_leading_zeros_lut("count_leading_zeros_lut");
|
||||
StubRoutines::x86::_vector_reverse_bit_lut = generate_vector_reverse_bit_lut("reverse_bit_lut");
|
||||
StubRoutines::x86::_vector_reverse_byte_perm_mask_long = generate_vector_reverse_byte_perm_mask_long("perm_mask_long");
|
||||
StubRoutines::x86::_vector_reverse_byte_perm_mask_int = generate_vector_reverse_byte_perm_mask_int("perm_mask_int");
|
||||
StubRoutines::x86::_vector_reverse_byte_perm_mask_short = generate_vector_reverse_byte_perm_mask_short("perm_mask_short");
|
||||
|
||||
if (UsePopCountInstruction && VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
|
||||
if (VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
|
||||
// lut implementation influenced by counting 1s algorithm from section 5-1 of Hackers' Delight.
|
||||
StubRoutines::x86::_vector_popcount_lut = generate_popcount_avx_lut("popcount_lut");
|
||||
}
|
||||
|
||||
@ -807,6 +807,21 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_count_leading_zeros_lut(const char *stub_name) {
|
||||
__ align64();
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x0101010102020304, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0101010102020304, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0101010102020304, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0101010102020304, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_popcount_avx_lut(const char *stub_name) {
|
||||
__ align64();
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
@ -837,6 +852,66 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_reverse_bit_lut(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x0E060A020C040800, relocInfo::none);
|
||||
__ emit_data64(0x0F070B030D050901, relocInfo::none);
|
||||
__ emit_data64(0x0E060A020C040800, relocInfo::none);
|
||||
__ emit_data64(0x0F070B030D050901, relocInfo::none);
|
||||
__ emit_data64(0x0E060A020C040800, relocInfo::none);
|
||||
__ emit_data64(0x0F070B030D050901, relocInfo::none);
|
||||
__ emit_data64(0x0E060A020C040800, relocInfo::none);
|
||||
__ emit_data64(0x0F070B030D050901, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_reverse_byte_perm_mask_long(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||
__ emit_data64(0x08090A0B0C0D0E0F, relocInfo::none);
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||
__ emit_data64(0x08090A0B0C0D0E0F, relocInfo::none);
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||
__ emit_data64(0x08090A0B0C0D0E0F, relocInfo::none);
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||
__ emit_data64(0x08090A0B0C0D0E0F, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_reverse_byte_perm_mask_int(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x0405060700010203, relocInfo::none);
|
||||
__ emit_data64(0x0C0D0E0F08090A0B, relocInfo::none);
|
||||
__ emit_data64(0x0405060700010203, relocInfo::none);
|
||||
__ emit_data64(0x0C0D0E0F08090A0B, relocInfo::none);
|
||||
__ emit_data64(0x0405060700010203, relocInfo::none);
|
||||
__ emit_data64(0x0C0D0E0F08090A0B, relocInfo::none);
|
||||
__ emit_data64(0x0405060700010203, relocInfo::none);
|
||||
__ emit_data64(0x0C0D0E0F08090A0B, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_reverse_byte_perm_mask_short(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x0607040502030001, relocInfo::none);
|
||||
__ emit_data64(0x0E0F0C0D0A0B0809, relocInfo::none);
|
||||
__ emit_data64(0x0607040502030001, relocInfo::none);
|
||||
__ emit_data64(0x0E0F0C0D0A0B0809, relocInfo::none);
|
||||
__ emit_data64(0x0607040502030001, relocInfo::none);
|
||||
__ emit_data64(0x0E0F0C0D0A0B0809, relocInfo::none);
|
||||
__ emit_data64(0x0607040502030001, relocInfo::none);
|
||||
__ emit_data64(0x0E0F0C0D0A0B0809, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_byte_shuffle_mask(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
@ -7955,8 +8030,13 @@ address generate_avx_ghash_processBlocks() {
|
||||
StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask("vector_long_shuffle_mask", 0x0000000100000000);
|
||||
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
|
||||
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
|
||||
StubRoutines::x86::_vector_count_leading_zeros_lut = generate_count_leading_zeros_lut("count_leading_zeros_lut");
|
||||
StubRoutines::x86::_vector_reverse_bit_lut = generate_vector_reverse_bit_lut("reverse_bit_lut");
|
||||
StubRoutines::x86::_vector_reverse_byte_perm_mask_long = generate_vector_reverse_byte_perm_mask_long("perm_mask_long");
|
||||
StubRoutines::x86::_vector_reverse_byte_perm_mask_int = generate_vector_reverse_byte_perm_mask_int("perm_mask_int");
|
||||
StubRoutines::x86::_vector_reverse_byte_perm_mask_short = generate_vector_reverse_byte_perm_mask_short("perm_mask_short");
|
||||
|
||||
if (UsePopCountInstruction && VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
|
||||
if (VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
|
||||
// lut implementation influenced by counting 1s algorithm from section 5-1 of Hackers' Delight.
|
||||
StubRoutines::x86::_vector_popcount_lut = generate_popcount_avx_lut("popcount_lut");
|
||||
}
|
||||
|
||||
@ -59,7 +59,12 @@ address StubRoutines::x86::_vector_double_sign_flip = NULL;
|
||||
address StubRoutines::x86::_vector_byte_perm_mask = NULL;
|
||||
address StubRoutines::x86::_vector_long_sign_mask = NULL;
|
||||
address StubRoutines::x86::_vector_iota_indices = NULL;
|
||||
address StubRoutines::x86::_vector_reverse_bit_lut = NULL;
|
||||
address StubRoutines::x86::_vector_reverse_byte_perm_mask_long = NULL;
|
||||
address StubRoutines::x86::_vector_reverse_byte_perm_mask_int = NULL;
|
||||
address StubRoutines::x86::_vector_reverse_byte_perm_mask_short = NULL;
|
||||
address StubRoutines::x86::_vector_popcount_lut = NULL;
|
||||
address StubRoutines::x86::_vector_count_leading_zeros_lut = NULL;
|
||||
address StubRoutines::x86::_vector_32_bit_mask = NULL;
|
||||
address StubRoutines::x86::_vector_64_bit_mask = NULL;
|
||||
#ifdef _LP64
|
||||
|
||||
@ -178,6 +178,11 @@ class x86 {
|
||||
static address _vector_long_shuffle_mask;
|
||||
static address _vector_iota_indices;
|
||||
static address _vector_popcount_lut;
|
||||
static address _vector_count_leading_zeros_lut;
|
||||
static address _vector_reverse_bit_lut;
|
||||
static address _vector_reverse_byte_perm_mask_long;
|
||||
static address _vector_reverse_byte_perm_mask_int;
|
||||
static address _vector_reverse_byte_perm_mask_short;
|
||||
#ifdef _LP64
|
||||
static juint _k256_W[];
|
||||
static address _k256_W_adr;
|
||||
@ -341,6 +346,26 @@ class x86 {
|
||||
return _vector_iota_indices;
|
||||
}
|
||||
|
||||
static address vector_count_leading_zeros_lut() {
|
||||
return _vector_count_leading_zeros_lut;
|
||||
}
|
||||
|
||||
static address vector_reverse_bit_lut() {
|
||||
return _vector_reverse_bit_lut;
|
||||
}
|
||||
|
||||
static address vector_reverse_byte_perm_mask_long() {
|
||||
return _vector_reverse_byte_perm_mask_long;
|
||||
}
|
||||
|
||||
static address vector_reverse_byte_perm_mask_int() {
|
||||
return _vector_reverse_byte_perm_mask_int;
|
||||
}
|
||||
|
||||
static address vector_reverse_byte_perm_mask_short() {
|
||||
return _vector_reverse_byte_perm_mask_short;
|
||||
}
|
||||
|
||||
static address vector_popcount_lut() {
|
||||
return _vector_popcount_lut;
|
||||
}
|
||||
|
||||
@ -922,6 +922,7 @@ void VM_Version::get_processor_features() {
|
||||
_features &= ~CPU_AVX512_VNNI;
|
||||
_features &= ~CPU_AVX512_VBMI;
|
||||
_features &= ~CPU_AVX512_VBMI2;
|
||||
_features &= ~CPU_AVX512_BITALG;
|
||||
}
|
||||
|
||||
if (UseAVX < 2)
|
||||
@ -951,6 +952,8 @@ void VM_Version::get_processor_features() {
|
||||
_features &= ~CPU_AVX512_VBMI2;
|
||||
_features &= ~CPU_CLWB;
|
||||
_features &= ~CPU_FLUSHOPT;
|
||||
_features &= ~CPU_GFNI;
|
||||
_features &= ~CPU_AVX512_BITALG;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -370,10 +370,11 @@ protected:
|
||||
decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \
|
||||
decl(HV, "hv", 46) /* Hypervisor instructions */ \
|
||||
decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */ \
|
||||
\
|
||||
decl(RDTSCP, "rdtscp", 48) /* RDTSCP instruction */ \
|
||||
decl(RDPID, "rdpid", 49) /* RDPID instruction */ \
|
||||
decl(FSRM, "fsrm", 50) /* Fast Short REP MOV */
|
||||
decl(FSRM, "fsrm", 50) /* Fast Short REP MOV */ \
|
||||
decl(GFNI, "gfni", 51) /* Vector GFNI instructions */ \
|
||||
decl(AVX512_BITALG, "avx512_bitalg", 52) /* Vector sub-word popcount and bit gather instructions */
|
||||
|
||||
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
|
||||
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
|
||||
@ -603,8 +604,12 @@ protected:
|
||||
result |= CPU_AVX512_VPCLMULQDQ;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
|
||||
result |= CPU_AVX512_VAES;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
|
||||
result |= CPU_GFNI;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
|
||||
result |= CPU_AVX512_VNNI;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
|
||||
result |= CPU_AVX512_BITALG;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
|
||||
result |= CPU_AVX512_VBMI;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
|
||||
@ -918,7 +923,9 @@ public:
|
||||
static bool supports_avx512_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
|
||||
static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
|
||||
static bool supports_avx512_vaes() { return (_features & CPU_AVX512_VAES) != 0; }
|
||||
static bool supports_gfni() { return (_features & CPU_GFNI) != 0; }
|
||||
static bool supports_avx512_vnni() { return (_features & CPU_AVX512_VNNI) != 0; }
|
||||
static bool supports_avx512_bitalg() { return (_features & CPU_AVX512_BITALG) != 0; }
|
||||
static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; }
|
||||
static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
|
||||
static bool supports_hv() { return (_features & CPU_HV) != 0; }
|
||||
|
||||
@ -1241,10 +1241,20 @@ static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use
|
||||
return vector_length_encoding(def);
|
||||
}
|
||||
|
||||
static inline bool is_vector_popcount_predicate(BasicType bt) {
|
||||
return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
|
||||
(is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
|
||||
}
|
||||
|
||||
static inline bool is_unsigned_booltest_pred(int bt) {
|
||||
return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare);
|
||||
}
|
||||
|
||||
static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
|
||||
return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
|
||||
(VM_Version::supports_avx512vl() || vlen_bytes == 64);
|
||||
}
|
||||
|
||||
class Node::PD {
|
||||
public:
|
||||
enum NodeFlags {
|
||||
@ -1405,12 +1415,12 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
}
|
||||
break;
|
||||
case Op_PopCountVI:
|
||||
if (!UsePopCountInstruction || (UseAVX < 2)) {
|
||||
if (UseAVX < 2) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_PopCountVL:
|
||||
if (!UsePopCountInstruction || (UseAVX <= 2)) {
|
||||
if (UseAVX < 2) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@ -1630,6 +1640,17 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CompressM:
|
||||
if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CompressV:
|
||||
case Op_ExpandV:
|
||||
if (!VM_Version::supports_avx512vl()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_SqrtF:
|
||||
if (UseSSE < 1) {
|
||||
return false;
|
||||
@ -1651,6 +1672,11 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
static inline bool is_pop_count_instr_target(BasicType bt) {
|
||||
return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
|
||||
(is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
|
||||
}
|
||||
|
||||
// Identify extra cases that we might want to provide match rules for vector nodes and
|
||||
// other intrinsics guarded with vector length (vlen) and element type (bt).
|
||||
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
||||
@ -1860,7 +1886,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
case Op_LoadVectorGatherMasked:
|
||||
case Op_StoreVectorScatterMasked:
|
||||
case Op_StoreVectorScatter:
|
||||
if(is_subword_type(bt)) {
|
||||
if (is_subword_type(bt)) {
|
||||
return false;
|
||||
} else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
|
||||
return false;
|
||||
@ -1887,6 +1913,23 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CompressM:
|
||||
if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CompressV:
|
||||
case Op_ExpandV:
|
||||
if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
|
||||
return false;
|
||||
}
|
||||
if (size_in_bits < 128 ) {
|
||||
return false;
|
||||
}
|
||||
if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_VectorLongToMask:
|
||||
if (UseAVX < 1 || !is_LP64) {
|
||||
return false;
|
||||
@ -1902,14 +1945,22 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
}
|
||||
break;
|
||||
case Op_PopCountVI:
|
||||
if (!VM_Version::supports_avx512_vpopcntdq() &&
|
||||
(vlen == 16) && !VM_Version::supports_avx512bw()) {
|
||||
case Op_PopCountVL: {
|
||||
if (!is_pop_count_instr_target(bt) &&
|
||||
(size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Op_ReverseV:
|
||||
case Op_ReverseBytesV:
|
||||
if (UseAVX < 2) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_PopCountVL:
|
||||
if (!VM_Version::supports_avx512_vpopcntdq() &&
|
||||
((vlen <= 4) || ((vlen == 8) && !VM_Version::supports_avx512bw()))) {
|
||||
case Op_CountTrailingZerosV:
|
||||
case Op_CountLeadingZerosV:
|
||||
if (UseAVX < 2) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@ -2057,9 +2108,20 @@ const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, Bas
|
||||
}
|
||||
return true;
|
||||
|
||||
case Op_PopCountVI:
|
||||
case Op_PopCountVL:
|
||||
if (!is_pop_count_instr_target(bt)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
case Op_MaskAll:
|
||||
return true;
|
||||
|
||||
case Op_CountLeadingZerosV:
|
||||
if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -8705,58 +8767,151 @@ instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
|
||||
|
||||
// --------------------------------- PopCount --------------------------------------
|
||||
|
||||
instruct vpopcountI_popcntd(vec dst, vec src) %{
|
||||
predicate(VM_Version::supports_avx512_vpopcntdq());
|
||||
instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
|
||||
predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
|
||||
match(Set dst (PopCountVI src));
|
||||
format %{ "vector_popcount_int $dst, $src\t! vector popcount packedI" %}
|
||||
match(Set dst (PopCountVL src));
|
||||
ins_cost(400);
|
||||
format %{ "vector_popcount_integral $dst, $src" %}
|
||||
ins_encode %{
|
||||
assert(UsePopCountInstruction, "not enabled");
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
__ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);
|
||||
int opcode = this->ideal_Opcode();
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vpopcountI(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
|
||||
predicate(!VM_Version::supports_avx512_vpopcntdq());
|
||||
instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
|
||||
predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
|
||||
match(Set dst (PopCountVI src mask));
|
||||
match(Set dst (PopCountVL src mask));
|
||||
format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
__ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
|
||||
predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
|
||||
match(Set dst (PopCountVI src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
|
||||
format %{ "vector_popcount_int $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
|
||||
match(Set dst (PopCountVL src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
|
||||
format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
assert(UsePopCountInstruction, "not enabled");
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
__ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
|
||||
$xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||
int opcode = this->ideal_Opcode();
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
|
||||
if (VM_Version::supports_avx512vl()) {
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
} else {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
__ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
||||
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
||||
}
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vpopcountL_popcntd(vec dst, vec src) %{
|
||||
predicate(VM_Version::supports_avx512_vpopcntdq());
|
||||
match(Set dst (PopCountVL src));
|
||||
format %{ "vector_popcount_long $dst, $src\t! vector popcount packedL" %}
|
||||
// --------------------------------- Vector Trailing Zeros Count --------------------------------------
|
||||
|
||||
instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
|
||||
predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
|
||||
Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (CountTrailingZerosV src));
|
||||
effect(TEMP dst, TEMP xtmp, TEMP rtmp);
|
||||
ins_cost(400);
|
||||
format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
assert(UsePopCountInstruction, "not enabled");
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
__ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
|
||||
xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, CountTrailingZerosV
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (bt == T_LONG && rbt == T_INT) {
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vpopcountL(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
|
||||
predicate(!VM_Version::supports_avx512_vpopcntdq());
|
||||
match(Set dst (PopCountVL src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
|
||||
format %{ "vector_popcount_long $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
|
||||
instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
|
||||
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
|
||||
VM_Version::supports_avx512cd() &&
|
||||
(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
|
||||
match(Set dst (CountTrailingZerosV src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
|
||||
ins_cost(400);
|
||||
format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
assert(UsePopCountInstruction, "not enabled");
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
__ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
|
||||
$xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
|
||||
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
|
||||
match(Set dst (CountTrailingZerosV src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
|
||||
ins_cost(400);
|
||||
format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
|
||||
$ktmp$$KRegister, $rtmp$$Register, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
|
||||
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
|
||||
match(Set dst (CountTrailingZerosV src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
|
||||
format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (bt == T_LONG && rbt == T_INT) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
__ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
||||
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
|
||||
// --------------------------------- Bitwise Ternary Logic ----------------------------------
|
||||
|
||||
instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
|
||||
@ -9031,8 +9186,200 @@ instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp,
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// --------------------------------- Compress/Expand Operations ---------------------------
|
||||
|
||||
instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
|
||||
match(Set dst (CompressV src mask));
|
||||
match(Set dst (ExpandV src mask));
|
||||
format %{ "vector_compress_expand $dst, $src, $mask" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
int vector_len = vector_length_encoding(this);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
|
||||
match(Set dst (CompressM mask));
|
||||
effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
|
||||
format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
|
||||
ins_encode %{
|
||||
assert(this->in(1)->bottom_type()->isa_vectmask(), "");
|
||||
int mask_len = Matcher::vector_length(this);
|
||||
__ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
#endif // _LP64
|
||||
|
||||
// -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
|
||||
|
||||
instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
|
||||
predicate(!VM_Version::supports_gfni());
|
||||
match(Set dst (ReverseV src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
|
||||
format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vec_enc = vector_length_encoding(this);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp, rRegI rtmp) %{
|
||||
predicate(VM_Version::supports_gfni());
|
||||
match(Set dst (ReverseV src));
|
||||
effect(TEMP dst, TEMP xtmp, TEMP rtmp);
|
||||
format %{ "vector_reverse_bit_gfni $dst, $src!\t using $rtmp and $xtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vec_enc = vector_length_encoding(this);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1));
|
||||
__ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
|
||||
addr, $rtmp$$Register, vec_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vreverse_byte_reg(vec dst, vec src, rRegI rtmp) %{
|
||||
predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
|
||||
match(Set dst (ReverseBytesV src));
|
||||
effect(TEMP dst, TEMP rtmp);
|
||||
format %{ "vector_reverse_byte $dst, $src!\t using $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vec_enc = vector_length_encoding(this);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, $rtmp$$Register, vec_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
|
||||
predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
|
||||
match(Set dst (ReverseBytesV src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
|
||||
format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vec_enc = vector_length_encoding(this);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// ---------------------------------- Vector Count Leading Zeros -----------------------------------
|
||||
|
||||
instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
|
||||
predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
|
||||
Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (CountLeadingZerosV src));
|
||||
format %{ "vector_count_leading_zeros $dst, $src" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
|
||||
xnoreg, xnoreg, k0, noreg, true, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (rbt == T_INT && bt == T_LONG) {
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
|
||||
predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
|
||||
Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (CountLeadingZerosV src mask));
|
||||
format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
|
||||
xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
|
||||
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
|
||||
VM_Version::supports_avx512cd() &&
|
||||
(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
|
||||
match(Set dst (CountLeadingZerosV src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
|
||||
format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
|
||||
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
|
||||
match(Set dst (CountLeadingZerosV src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
|
||||
format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
|
||||
$rtmp$$Register, true, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
|
||||
predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
|
||||
!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
|
||||
match(Set dst (CountLeadingZerosV src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
|
||||
format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
|
||||
predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
|
||||
!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
|
||||
match(Set dst (CountLeadingZerosV src));
|
||||
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
|
||||
format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (rbt == T_INT && bt == T_LONG) {
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// ---------------------------------- Vector Masked Operations ------------------------------------
|
||||
|
||||
instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
|
||||
|
||||
@ -4212,6 +4212,7 @@ bool MatchRule::is_vector() const {
|
||||
"SqrtVD","SqrtVF",
|
||||
"AndV" ,"XorV" ,"OrV",
|
||||
"MaxV", "MinV",
|
||||
"CompressV", "ExpandV", "CompressM",
|
||||
"AddReductionVI", "AddReductionVL",
|
||||
"AddReductionVF", "AddReductionVD",
|
||||
"MulReductionVI", "MulReductionVL",
|
||||
@ -4223,7 +4224,7 @@ bool MatchRule::is_vector() const {
|
||||
"LShiftVB","LShiftVS","LShiftVI","LShiftVL",
|
||||
"RShiftVB","RShiftVS","RShiftVI","RShiftVL",
|
||||
"URShiftVB","URShiftVS","URShiftVI","URShiftVL",
|
||||
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD","PopulateIndex",
|
||||
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD","ReverseV","ReverseBytesV",
|
||||
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
|
||||
"LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked",
|
||||
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
|
||||
@ -4232,7 +4233,8 @@ bool MatchRule::is_vector() const {
|
||||
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
|
||||
"VectorUCastB2X", "VectorUCastS2X", "VectorUCastI2X",
|
||||
"VectorMaskWrapper","VectorMaskCmp","VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
|
||||
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "SignumVF", "SignumVD", "VectorLongToMask",
|
||||
"FmaVD","FmaVF","PopCountVI","PopCountVL","PopulateIndex","VectorLongToMask",
|
||||
"CountLeadingZerosV", "CountTrailingZerosV", "SignumVF", "SignumVD",
|
||||
// Next are vector mask ops.
|
||||
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
|
||||
"RoundVF", "RoundVD",
|
||||
|
||||
@ -935,7 +935,7 @@ class methodHandle;
|
||||
"Ljava/lang/Object;" \
|
||||
"J" \
|
||||
"Ljava/lang/Object;" \
|
||||
"I" \
|
||||
"J" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
|
||||
@ -950,7 +950,7 @@ class methodHandle;
|
||||
"J" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
|
||||
"Ljava/lang/Object;" \
|
||||
"I" \
|
||||
"J" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$LoadVectorMaskedOperation;)" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
|
||||
@ -962,8 +962,10 @@ class methodHandle;
|
||||
"I" \
|
||||
"Ljava/lang/Object;" \
|
||||
"J" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
|
||||
"Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \
|
||||
"Ljava/lang/Object;" \
|
||||
"J" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)" \
|
||||
"V") \
|
||||
do_name(vector_store_op_name, "store") \
|
||||
\
|
||||
@ -977,7 +979,7 @@ class methodHandle;
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
|
||||
"Ljava/lang/Object;" \
|
||||
"I" \
|
||||
"J" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$StoreVectorMaskedOperation;)" \
|
||||
"V") \
|
||||
do_name(vector_store_masked_op_name, "storeMasked") \
|
||||
@ -1137,6 +1139,17 @@ class methodHandle;
|
||||
"J") \
|
||||
do_name(vector_mask_oper_name, "maskReductionCoerced") \
|
||||
\
|
||||
do_intrinsic(_VectorCompressExpand, jdk_internal_vm_vector_VectorSupport, vector_compress_expand_op_name, vector_compress_expand_op_sig, F_S)\
|
||||
do_signature(vector_compress_expand_op_sig, "(I" \
|
||||
"Ljava/lang/Class;" \
|
||||
"Ljava/lang/Class;" \
|
||||
"Ljava/lang/Class;" \
|
||||
"I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$CompressExpandOperation;)" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
|
||||
do_name(vector_compress_expand_op_name, "compressExpandOp") \
|
||||
/* (2) Bytecode intrinsics */ \
|
||||
\
|
||||
do_intrinsic(_park, jdk_internal_misc_Unsafe, park_name, park_signature, F_RN) \
|
||||
@ -1245,7 +1258,7 @@ enum class vmIntrinsicID : int {
|
||||
__IGNORE_CLASS, __IGNORE_NAME, __IGNORE_SIGNATURE, __IGNORE_ALIAS)
|
||||
|
||||
ID_LIMIT,
|
||||
LAST_COMPILER_INLINE = _VectorMaskOp,
|
||||
LAST_COMPILER_INLINE = _VectorCompressExpand,
|
||||
FIRST_MH_SIG_POLY = _invokeGeneric,
|
||||
FIRST_MH_STATIC = _linkToVirtual,
|
||||
LAST_MH_SIG_POLY = _linkToNative,
|
||||
|
||||
@ -715,6 +715,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
||||
case vmIntrinsics::_Continuation_doYield:
|
||||
break;
|
||||
|
||||
case vmIntrinsics::_VectorCompressExpand:
|
||||
case vmIntrinsics::_VectorUnaryOp:
|
||||
case vmIntrinsics::_VectorBinaryOp:
|
||||
case vmIntrinsics::_VectorTernaryOp:
|
||||
|
||||
@ -51,6 +51,7 @@ macro(ReverseBytesI)
|
||||
macro(ReverseBytesL)
|
||||
macro(ReverseBytesUS)
|
||||
macro(ReverseBytesS)
|
||||
macro(ReverseBytesV)
|
||||
macro(CProj)
|
||||
macro(CacheWB)
|
||||
macro(CacheWBPreSync)
|
||||
@ -74,6 +75,8 @@ macro(Catch)
|
||||
macro(CatchProj)
|
||||
macro(CheckCastPP)
|
||||
macro(ClearArray)
|
||||
macro(CompressBits)
|
||||
macro(ExpandBits)
|
||||
macro(ConstraintCast)
|
||||
macro(CMoveD)
|
||||
macro(CMoveVD)
|
||||
@ -152,8 +155,10 @@ macro(LongCountedLoop)
|
||||
macro(LongCountedLoopEnd)
|
||||
macro(CountLeadingZerosI)
|
||||
macro(CountLeadingZerosL)
|
||||
macro(CountLeadingZerosV)
|
||||
macro(CountTrailingZerosI)
|
||||
macro(CountTrailingZerosL)
|
||||
macro(CountTrailingZerosV)
|
||||
macro(CreateEx)
|
||||
macro(DecodeN)
|
||||
macro(DecodeNKlass)
|
||||
@ -285,6 +290,9 @@ macro(RShiftL)
|
||||
macro(Region)
|
||||
macro(Rethrow)
|
||||
macro(Return)
|
||||
macro(ReverseI)
|
||||
macro(ReverseL)
|
||||
macro(ReverseV)
|
||||
macro(Root)
|
||||
macro(RoundDouble)
|
||||
macro(RoundDoubleMode)
|
||||
@ -424,6 +432,9 @@ macro(MinV)
|
||||
macro(MaxV)
|
||||
macro(MinReductionV)
|
||||
macro(MaxReductionV)
|
||||
macro(CompressV)
|
||||
macro(CompressM)
|
||||
macro(ExpandV)
|
||||
macro(LoadVector)
|
||||
macro(LoadVectorGather)
|
||||
macro(LoadVectorGatherMasked)
|
||||
|
||||
@ -700,6 +700,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
return inline_vector_insert();
|
||||
case vmIntrinsics::_VectorExtract:
|
||||
return inline_vector_extract();
|
||||
case vmIntrinsics::_VectorCompressExpand:
|
||||
return inline_vector_compress_expand();
|
||||
|
||||
case vmIntrinsics::_getObjectSize:
|
||||
return inline_getObjectSize();
|
||||
|
||||
@ -344,6 +344,8 @@ class LibraryCallKit : public GraphKit {
|
||||
bool inline_vector_convert();
|
||||
bool inline_vector_extract();
|
||||
bool inline_vector_insert();
|
||||
bool inline_vector_compress_expand();
|
||||
|
||||
Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
|
||||
|
||||
enum VectorMaskUseType {
|
||||
|
||||
@ -974,6 +974,9 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
|
||||
case Op_RoundD: {
|
||||
body_size += Matcher::scalar_op_pre_select_sz_estimate(n->Opcode(), n->bottom_type()->basic_type());
|
||||
} break;
|
||||
case Op_CountTrailingZerosV:
|
||||
case Op_CountLeadingZerosV:
|
||||
case Op_ReverseV:
|
||||
case Op_RoundVF:
|
||||
case Op_RoundVD:
|
||||
case Op_PopCountVI:
|
||||
|
||||
@ -2254,6 +2254,9 @@ bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_
|
||||
case Op_MacroLogicV:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_VectorCmpMasked:
|
||||
case Op_CompressV:
|
||||
case Op_CompressM:
|
||||
case Op_ExpandV:
|
||||
case Op_VectorLoadMask:
|
||||
set_shared(n); // Force result into register (it will be anyways)
|
||||
break;
|
||||
|
||||
@ -389,4 +389,20 @@ public:
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
//------------------------------CompressBitsNode-------------------------------
|
||||
// CompressBits placeholder node
|
||||
class CompressBitsNode : public Node {
|
||||
public:
|
||||
CompressBitsNode(Node *in1, Node *in2) : Node(0,in1,in2) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------ExpandBitsNode---------------------------------
|
||||
// ExpandBits placeholder node
|
||||
class ExpandBitsNode : public Node {
|
||||
public:
|
||||
ExpandBitsNode(Node *in1, Node *in2) : Node(0,in1,in2) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
#endif // SHARE_OPTO_MULNODE_HPP
|
||||
|
||||
@ -175,6 +175,9 @@ class VectorUnboxNode;
|
||||
class VectorSet;
|
||||
class VectorReinterpretNode;
|
||||
class ShiftVNode;
|
||||
class ExpandVNode;
|
||||
class CompressVNode;
|
||||
class CompressMNode;
|
||||
|
||||
|
||||
#ifndef OPTO_DU_ITERATOR_ASSERT
|
||||
@ -704,6 +707,9 @@ public:
|
||||
DEFINE_CLASS_ID(VectorUnbox, Vector, 1)
|
||||
DEFINE_CLASS_ID(VectorReinterpret, Vector, 2)
|
||||
DEFINE_CLASS_ID(ShiftV, Vector, 3)
|
||||
DEFINE_CLASS_ID(CompressV, Vector, 4)
|
||||
DEFINE_CLASS_ID(ExpandV, Vector, 5)
|
||||
DEFINE_CLASS_ID(CompressM, Vector, 6)
|
||||
|
||||
DEFINE_CLASS_ID(Proj, Node, 3)
|
||||
DEFINE_CLASS_ID(CatchProj, Proj, 0)
|
||||
@ -777,7 +783,8 @@ public:
|
||||
Flag_is_predicated_vector = 1 << 14,
|
||||
Flag_for_post_loop_opts_igvn = 1 << 15,
|
||||
Flag_is_removed_by_peephole = 1 << 16,
|
||||
_last_flag = Flag_is_removed_by_peephole
|
||||
Flag_is_predicated_using_blend = 1 << 17,
|
||||
_last_flag = Flag_is_predicated_using_blend
|
||||
};
|
||||
|
||||
class PD;
|
||||
@ -931,7 +938,10 @@ public:
|
||||
DEFINE_CLASS_QUERY(Vector)
|
||||
DEFINE_CLASS_QUERY(VectorMaskCmp)
|
||||
DEFINE_CLASS_QUERY(VectorUnbox)
|
||||
DEFINE_CLASS_QUERY(VectorReinterpret);
|
||||
DEFINE_CLASS_QUERY(VectorReinterpret)
|
||||
DEFINE_CLASS_QUERY(CompressV)
|
||||
DEFINE_CLASS_QUERY(ExpandV)
|
||||
DEFINE_CLASS_QUERY(CompressM)
|
||||
DEFINE_CLASS_QUERY(LoadVector)
|
||||
DEFINE_CLASS_QUERY(LoadVectorGather)
|
||||
DEFINE_CLASS_QUERY(StoreVector)
|
||||
@ -989,6 +999,8 @@ public:
|
||||
|
||||
bool is_predicated_vector() const { return (_flags & Flag_is_predicated_vector) != 0; }
|
||||
|
||||
bool is_predicated_using_blend() const { return (_flags & Flag_is_predicated_using_blend) != 0; }
|
||||
|
||||
// Used in lcm to mark nodes that have scheduled
|
||||
bool is_scheduled() const { return (_flags & Flag_is_scheduled) != 0; }
|
||||
|
||||
|
||||
@ -548,4 +548,24 @@ public:
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
//-------------------------------ReverseINode--------------------------------
|
||||
// reverse bits of an int
|
||||
class ReverseINode : public Node {
|
||||
public:
|
||||
ReverseINode(Node *c, Node *in1) : Node(c, in1) {}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return TypeInt::INT; }
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
//-------------------------------ReverseLNode--------------------------------
|
||||
// reverse bits of a long
|
||||
class ReverseLNode : public Node {
|
||||
public:
|
||||
ReverseLNode(Node *c, Node *in1) : Node(c, in1) {}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return TypeLong::LONG; }
|
||||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
};
|
||||
|
||||
#endif // SHARE_OPTO_SUBNODE_HPP
|
||||
|
||||
@ -2585,7 +2585,9 @@ bool SuperWord::output() {
|
||||
opc == Op_AbsI || opc == Op_AbsL ||
|
||||
opc == Op_NegF || opc == Op_NegD ||
|
||||
opc == Op_RoundF || opc == Op_RoundD ||
|
||||
opc == Op_PopCountI || opc == Op_PopCountL) {
|
||||
opc == Op_PopCountI || opc == Op_PopCountL ||
|
||||
opc == Op_CountLeadingZerosI || opc == Op_CountLeadingZerosL ||
|
||||
opc == Op_CountTrailingZerosI || opc == Op_CountTrailingZerosL) {
|
||||
assert(n->req() == 2, "only one input expected");
|
||||
Node* in = vector_opd(p, 1);
|
||||
vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
|
||||
@ -3092,9 +3094,9 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (VectorNode::is_vpopcnt_long(use)) {
|
||||
// VPOPCNT_LONG takes long and produces int - hence the special checks
|
||||
// on alignment and size.
|
||||
if (VectorNode::is_type_transition_long_to_int(use)) {
|
||||
// PopCountL/CountLeadingZerosL/CountTrailingZerosL takes long and produces
|
||||
// int - hence the special checks on alignment and size.
|
||||
if (u_pk->size() != d_pk->size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -80,9 +80,12 @@ bool LibraryCallKit::arch_supports_vector_rotate(int opc, int num_elem, BasicTyp
|
||||
}
|
||||
|
||||
if (is_supported) {
|
||||
// Check whether mask unboxing is supported.
|
||||
// Check if mask unboxing is supported, this is a two step process which first loads the contents
|
||||
// of boolean array into vector followed by either lane expansion to match the lane size of masked
|
||||
// vector operation or populate the predicate register.
|
||||
if ((mask_use_type & VecMaskUseLoad) != 0) {
|
||||
if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, elem_bt)) {
|
||||
if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, elem_bt) ||
|
||||
!Matcher::match_rule_supported_vector(Op_LoadVector, num_elem, T_BOOLEAN)) {
|
||||
#ifndef PRODUCT
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** Rejected vector mask loading (%s,%s,%d) because architecture does not support it",
|
||||
@ -260,9 +263,12 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check whether mask unboxing is supported.
|
||||
// Check if mask unboxing is supported, this is a two step process which first loads the contents
|
||||
// of boolean array into vector followed by either lane expansion to match the lane size of masked
|
||||
// vector operation or populate the predicate register.
|
||||
if ((mask_use_type & VecMaskUseLoad) != 0) {
|
||||
if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, type)) {
|
||||
if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, type) ||
|
||||
!Matcher::match_rule_supported_vector(Op_LoadVector, num_elem, T_BOOLEAN)) {
|
||||
#ifndef PRODUCT
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** Rejected vector mask loading (%s,%s,%d) because architecture does not support it",
|
||||
@ -273,9 +279,12 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type
|
||||
}
|
||||
}
|
||||
|
||||
// Check whether mask boxing is supported.
|
||||
// Check if mask boxing is supported, this is a two step process which first stores the contents
|
||||
// of mask vector / predicate register into a boolean vector followed by vector store operation to
|
||||
// transfer the contents to underlined storage of mask boxes which is a boolean array.
|
||||
if ((mask_use_type & VecMaskUseStore) != 0) {
|
||||
if (!Matcher::match_rule_supported_vector(Op_VectorStoreMask, num_elem, type)) {
|
||||
if (!Matcher::match_rule_supported_vector(Op_VectorStoreMask, num_elem, type) ||
|
||||
!Matcher::match_rule_supported_vector(Op_StoreVector, num_elem, T_BOOLEAN)) {
|
||||
#ifndef PRODUCT
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr("Rejected vector mask storing (%s,%s,%d) because architecture does not support it",
|
||||
@ -560,6 +569,7 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
|
||||
operation->add_req(mask);
|
||||
operation->add_flag(Node::Flag_is_predicated_vector);
|
||||
} else {
|
||||
operation->add_flag(Node::Flag_is_predicated_using_blend);
|
||||
operation = gvn().transform(operation);
|
||||
operation = new VectorBlendNode(opd1, operation, mask);
|
||||
}
|
||||
@ -695,16 +705,8 @@ bool LibraryCallKit::inline_vector_mask_operation() {
|
||||
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
|
||||
BasicType elem_bt = elem_type->basic_type();
|
||||
|
||||
if (!arch_supports_vector(Op_LoadVector, num_elem, T_BOOLEAN, VecMaskNotUsed)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s",
|
||||
Op_LoadVector, num_elem, type2name(T_BOOLEAN));
|
||||
}
|
||||
return false; // not supported
|
||||
}
|
||||
|
||||
int mopc = VectorSupport::vop2ideal(oper->get_con(), elem_bt);
|
||||
if (!arch_supports_vector(mopc, num_elem, elem_bt, VecMaskNotUsed)) {
|
||||
if (!arch_supports_vector(mopc, num_elem, elem_bt, VecMaskUseLoad)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s",
|
||||
mopc, num_elem, type2name(elem_bt));
|
||||
@ -937,7 +939,7 @@ static bool elem_consistent_with_arr(BasicType elem_bt, const TypeAryPtr* arr_ty
|
||||
// S extends VectorSpecies<E>>
|
||||
// VM load(Class<? extends VM> vmClass, Class<E> elementType, int length,
|
||||
// Object base, long offset, // Unsafe addressing
|
||||
// C container, int index, S s, // Arguments for default implementation
|
||||
// C container, long index, S s, // Arguments for default implementation
|
||||
// LoadOperation<C, VM, E, S> defaultImpl)
|
||||
//
|
||||
// public static
|
||||
@ -946,7 +948,7 @@ static bool elem_consistent_with_arr(BasicType elem_bt, const TypeAryPtr* arr_ty
|
||||
// void store(Class<?> vectorClass, Class<?> elementType, int length,
|
||||
// Object base, long offset, // Unsafe addressing
|
||||
// V v,
|
||||
// C container, int index, // Arguments for default implementation
|
||||
// C container, long index, // Arguments for default implementation
|
||||
// StoreVectorOperation<C, V> defaultImpl)
|
||||
|
||||
bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
|
||||
@ -1049,16 +1051,6 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
|
||||
}
|
||||
}
|
||||
if (is_mask) {
|
||||
if (!arch_supports_vector(Op_LoadVector, num_elem, T_BOOLEAN, VecMaskNotUsed)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** not supported: arity=%d op=%s/mask vlen=%d etype=bit ismask=no",
|
||||
is_store, is_store ? "store" : "load",
|
||||
num_elem);
|
||||
}
|
||||
set_map(old_map);
|
||||
set_sp(old_sp);
|
||||
return false; // not supported
|
||||
}
|
||||
if (!is_store) {
|
||||
if (!arch_supports_vector(Op_LoadVector, num_elem, elem_bt, VecMaskUseLoad)) {
|
||||
set_map(old_map);
|
||||
@ -1096,7 +1088,9 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
|
||||
const TypeVect* to_vect_type = TypeVect::make(T_BYTE, store_num_elem);
|
||||
val = gvn().transform(new VectorReinterpretNode(val, val->bottom_type()->is_vect(), to_vect_type));
|
||||
}
|
||||
|
||||
if (is_mask) {
|
||||
val = gvn().transform(VectorStoreMaskNode::make(gvn(), val, elem_bt, num_elem));
|
||||
}
|
||||
Node* vstore = gvn().transform(StoreVectorNode::make(0, control(), memory(addr), addr, addr_type, val, store_num_elem));
|
||||
set_memory(vstore, addr_type);
|
||||
} else {
|
||||
@ -1138,7 +1132,7 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
|
||||
// M extends VectorMask<E>>
|
||||
// V loadMasked(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
|
||||
// int length, Object base, long offset, M m,
|
||||
// C container, int index, S s, // Arguments for default implementation
|
||||
// C container, long index, S s, // Arguments for default implementation
|
||||
// LoadVectorMaskedOperation<C, V, S, M> defaultImpl) {
|
||||
//
|
||||
// public static
|
||||
@ -1149,7 +1143,7 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
|
||||
// void storeMasked(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
|
||||
// int length, Object base, long offset,
|
||||
// V v, M m,
|
||||
// C container, int index, // Arguments for default implementation
|
||||
// C container, long index, // Arguments for default implementation
|
||||
// StoreVectorMaskedOperation<C, V, M, E> defaultImpl) {
|
||||
//
|
||||
bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
|
||||
@ -2736,3 +2730,97 @@ bool LibraryCallKit::inline_vector_extract() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// public static
|
||||
// <V extends Vector<E>,
|
||||
// M extends VectorMask<E>,
|
||||
// E>
|
||||
// V compressExpandOp(int opr,
|
||||
// Class<? extends V> vClass, Class<? extends M> mClass, Class<E> eClass,
|
||||
// int length, V v, M m,
|
||||
// CompressExpandOperation<V, M> defaultImpl)
|
||||
bool LibraryCallKit::inline_vector_compress_expand() {
|
||||
const TypeInt* opr = gvn().type(argument(0))->isa_int();
|
||||
const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
|
||||
const TypeInstPtr* mask_klass = gvn().type(argument(2))->isa_instptr();
|
||||
const TypeInstPtr* elem_klass = gvn().type(argument(3))->isa_instptr();
|
||||
const TypeInt* vlen = gvn().type(argument(4))->isa_int();
|
||||
|
||||
if (vector_klass == NULL || elem_klass == NULL || mask_klass == NULL || vlen == NULL ||
|
||||
vector_klass->const_oop() == NULL || mask_klass->const_oop() == NULL ||
|
||||
elem_klass->const_oop() == NULL || !vlen->is_con() || !opr->is_con()) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** missing constant: opr=%s vclass=%s mclass=%s etype=%s vlen=%s",
|
||||
NodeClassNames[argument(0)->Opcode()],
|
||||
NodeClassNames[argument(1)->Opcode()],
|
||||
NodeClassNames[argument(2)->Opcode()],
|
||||
NodeClassNames[argument(3)->Opcode()],
|
||||
NodeClassNames[argument(4)->Opcode()]);
|
||||
}
|
||||
return false; // not enough info for intrinsification
|
||||
}
|
||||
|
||||
if (!is_klass_initialized(vector_klass) || !is_klass_initialized(mask_klass)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** klass argument not initialized");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
|
||||
if (!elem_type->is_primitive_type()) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** not a primitive bt=%d", elem_type->basic_type());
|
||||
}
|
||||
return false; // should be primitive type
|
||||
}
|
||||
|
||||
int num_elem = vlen->get_con();
|
||||
BasicType elem_bt = elem_type->basic_type();
|
||||
int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt);
|
||||
|
||||
if (!arch_supports_vector(opc, num_elem, elem_bt, VecMaskUseLoad)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** not supported: opc=%d vlen=%d etype=%s ismask=useload",
|
||||
opc, num_elem, type2name(elem_bt));
|
||||
}
|
||||
return false; // not supported
|
||||
}
|
||||
|
||||
Node* opd1 = NULL;
|
||||
const TypeInstPtr* vbox_type = NULL;
|
||||
if (opc != Op_CompressM) {
|
||||
ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
|
||||
vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
|
||||
opd1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
|
||||
if (opd1 == NULL) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** unbox failed vector=%s",
|
||||
NodeClassNames[argument(5)->Opcode()]);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
|
||||
assert(is_vector_mask(mbox_klass), "argument(6) should be a mask class");
|
||||
const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
|
||||
|
||||
Node* mask = unbox_vector(argument(6), mbox_type, elem_bt, num_elem);
|
||||
if (mask == NULL) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** unbox failed mask=%s",
|
||||
NodeClassNames[argument(6)->Opcode()]);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const TypeVect* vt = TypeVect::make(elem_bt, num_elem, opc == Op_CompressM);
|
||||
Node* operation = gvn().transform(VectorNode::make(opc, opd1, mask, vt));
|
||||
|
||||
// Wrap it up in VectorBox to keep object type information.
|
||||
const TypeInstPtr* box_type = opc == Op_CompressM ? mbox_type : vbox_type;
|
||||
Node* vbox = box_vector(operation, box_type, elem_bt, num_elem);
|
||||
set_result(vbox);
|
||||
C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -162,11 +162,22 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
case Op_RoundD:
|
||||
return (bt == T_LONG ? Op_RoundVD : 0);
|
||||
case Op_PopCountI:
|
||||
// Unimplemented for subword types since bit count changes
|
||||
// depending on size of lane (and sign bit).
|
||||
return (bt == T_INT ? Op_PopCountVI : 0);
|
||||
return Op_PopCountVI;
|
||||
case Op_PopCountL:
|
||||
return Op_PopCountVL;
|
||||
case Op_ReverseI:
|
||||
case Op_ReverseL:
|
||||
return (is_integral_type(bt) ? Op_ReverseV : 0);
|
||||
case Op_ReverseBytesS:
|
||||
case Op_ReverseBytesI:
|
||||
case Op_ReverseBytesL:
|
||||
return (is_integral_type(bt) ? Op_ReverseBytesV : 0);
|
||||
case Op_CompressBits:
|
||||
// Not implemented. Returning 0 temporarily
|
||||
return 0;
|
||||
case Op_ExpandBits:
|
||||
// Not implemented. Returning 0 temporarily
|
||||
return 0;
|
||||
case Op_LShiftI:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
@ -245,6 +256,12 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
return Op_VectorCastF2X;
|
||||
case Op_ConvD2L:
|
||||
return Op_VectorCastD2X;
|
||||
case Op_CountLeadingZerosI:
|
||||
case Op_CountLeadingZerosL:
|
||||
return Op_CountLeadingZerosV;
|
||||
case Op_CountTrailingZerosI:
|
||||
case Op_CountTrailingZerosL:
|
||||
return Op_CountTrailingZerosV;
|
||||
case Op_SignumF:
|
||||
return Op_SignumVF;
|
||||
case Op_SignumD:
|
||||
@ -317,16 +334,17 @@ bool VectorNode::is_muladds2i(Node* n) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool VectorNode::is_vpopcnt_long(Node* n) {
|
||||
if (n->Opcode() == Op_PopCountL) {
|
||||
return true;
|
||||
bool VectorNode::is_type_transition_long_to_int(Node* n) {
|
||||
switch(n->Opcode()) {
|
||||
case Op_PopCountL:
|
||||
case Op_CountLeadingZerosL:
|
||||
case Op_CountTrailingZerosL:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
bool VectorNode::is_roundopD(Node* n) {
|
||||
if (n->Opcode() == Op_RoundDoubleMode) {
|
||||
return true;
|
||||
@ -595,6 +613,9 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
|
||||
case Op_NegVF: return new NegVFNode(n1, vt);
|
||||
case Op_NegVD: return new NegVDNode(n1, vt);
|
||||
|
||||
case Op_ReverseV: return new ReverseVNode(n1, vt);
|
||||
case Op_ReverseBytesV: return new ReverseBytesVNode(n1, vt);
|
||||
|
||||
case Op_SqrtVF: return new SqrtVFNode(n1, vt);
|
||||
case Op_SqrtVD: return new SqrtVDNode(n1, vt);
|
||||
|
||||
@ -628,6 +649,12 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
|
||||
case Op_RoundDoubleModeV: return new RoundDoubleModeVNode(n1, n2, vt);
|
||||
|
||||
case Op_MulAddVS2VI: return new MulAddVS2VINode(n1, n2, vt);
|
||||
|
||||
case Op_ExpandV: return new ExpandVNode(n1, n2, vt);
|
||||
case Op_CompressV: return new CompressVNode(n1, n2, vt);
|
||||
case Op_CompressM: assert(n1 == NULL, ""); return new CompressMNode(n2, vt);
|
||||
case Op_CountLeadingZerosV: return new CountLeadingZerosVNode(n1, vt);
|
||||
case Op_CountTrailingZerosV: return new CountTrailingZerosVNode(n1, vt);
|
||||
default:
|
||||
fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
|
||||
return NULL;
|
||||
@ -1669,6 +1696,38 @@ Node* NegVNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Node* ReverseBytesVNode::Identity(PhaseGVN* phase) {
|
||||
if (is_predicated_using_blend()) {
|
||||
return this;
|
||||
}
|
||||
// ReverseBytesV (ReverseBytesV X , MASK) , MASK => X
|
||||
if (in(1)->Opcode() == Op_ReverseBytesV) {
|
||||
if (is_predicated_vector() && in(1)->is_predicated_vector() && in(2) == in(1)->in(2)) {
|
||||
return in(1)->in(1);
|
||||
} else {
|
||||
// ReverseBytesV (ReverseBytesV X) => X
|
||||
return in(1)->in(1);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
Node* ReverseVNode::Identity(PhaseGVN* phase) {
|
||||
if (is_predicated_using_blend()) {
|
||||
return this;
|
||||
}
|
||||
// ReverseV (ReverseV X , MASK) , MASK => X
|
||||
if (in(1)->Opcode() == Op_ReverseV) {
|
||||
if (is_predicated_vector() && in(1)->is_predicated_vector() && in(2) == in(1)->in(2)) {
|
||||
return in(1)->in(1);
|
||||
} else {
|
||||
// ReverseV (ReverseV X) => X
|
||||
return in(1)->in(1);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void VectorBoxAllocateNode::dump_spec(outputStream *st) const {
|
||||
CallStaticJavaNode::dump_spec(st);
|
||||
|
||||
@ -93,7 +93,7 @@ class VectorNode : public TypeNode {
|
||||
static bool is_type_transition_short_to_int(Node* n);
|
||||
static bool is_type_transition_to_int(Node* n);
|
||||
static bool is_muladds2i(Node* n);
|
||||
static bool is_vpopcnt_long(Node* n);
|
||||
static bool is_type_transition_long_to_int(Node* n);
|
||||
static bool is_roundopD(Node* n);
|
||||
static bool is_scalar_rotate(Node* n);
|
||||
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
|
||||
@ -769,6 +769,37 @@ public:
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------CompressVNode--------------------------------------
|
||||
// Vector compress
|
||||
class CompressVNode: public VectorNode {
|
||||
public:
|
||||
CompressVNode(Node* vec, Node* mask, const TypeVect* vt) :
|
||||
VectorNode(vec, mask, vt) {
|
||||
init_class_id(Class_CompressV);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class CompressMNode: public VectorNode {
|
||||
public:
|
||||
CompressMNode(Node* mask, const TypeVect* vt) :
|
||||
VectorNode(mask, vt) {
|
||||
init_class_id(Class_CompressM);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------ExpandVNode--------------------------------------
|
||||
// Vector expand
|
||||
class ExpandVNode: public VectorNode {
|
||||
public:
|
||||
ExpandVNode(Node* vec, Node* mask, const TypeVect* vt) :
|
||||
VectorNode(vec, mask, vt) {
|
||||
init_class_id(Class_ExpandV);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//================================= M E M O R Y ===============================
|
||||
|
||||
//------------------------------LoadVectorNode---------------------------------
|
||||
@ -1392,7 +1423,6 @@ class VectorBlendNode : public VectorNode {
|
||||
public:
|
||||
VectorBlendNode(Node* vec1, Node* vec2, Node* mask)
|
||||
: VectorNode(vec1, vec2, mask, vec1->bottom_type()->is_vect()) {
|
||||
// assert(mask->is_VectorMask(), "VectorBlendNode requires that third argument be a mask");
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
@ -1675,6 +1705,40 @@ public:
|
||||
Node* Ideal(PhaseGVN* phase, bool can_reshape);
|
||||
};
|
||||
|
||||
class CountLeadingZerosVNode : public VectorNode {
|
||||
public:
|
||||
CountLeadingZerosVNode(Node* in, const TypeVect* vt)
|
||||
: VectorNode(in, vt) {}
|
||||
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class CountTrailingZerosVNode : public VectorNode {
|
||||
public:
|
||||
CountTrailingZerosVNode(Node* in, const TypeVect* vt)
|
||||
: VectorNode(in, vt) {}
|
||||
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class ReverseVNode : public VectorNode {
|
||||
public:
|
||||
ReverseVNode(Node* in, const TypeVect* vt)
|
||||
: VectorNode(in, vt) {}
|
||||
|
||||
virtual Node* Identity(PhaseGVN* phase);
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class ReverseBytesVNode : public VectorNode {
|
||||
public:
|
||||
ReverseBytesVNode(Node* in, const TypeVect* vt)
|
||||
: VectorNode(in, vt) {}
|
||||
|
||||
virtual Node* Identity(PhaseGVN* phase);
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class SignumVFNode : public VectorNode {
|
||||
public:
|
||||
SignumVFNode(Node* in1, Node* zero, Node* one, const TypeVect* vt)
|
||||
@ -1690,4 +1754,5 @@ public:
|
||||
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
#endif // SHARE_OPTO_VECTORNODE_HPP
|
||||
|
||||
@ -443,6 +443,109 @@ int VectorSupport::vop2ideal(jint id, BasicType bt) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_EXPAND: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: // fall-through
|
||||
case T_LONG: // fall-through
|
||||
case T_FLOAT: // fall-through
|
||||
case T_DOUBLE: return Op_ExpandV;
|
||||
default: fatal("EXPAND: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_COMPRESS: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: // fall-through
|
||||
case T_LONG: // fall-through
|
||||
case T_FLOAT: // fall-through
|
||||
case T_DOUBLE: return Op_CompressV;
|
||||
default: fatal("COMPRESS: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_MASK_COMPRESS: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: // fall-through
|
||||
case T_LONG: // fall-through
|
||||
case T_FLOAT: // fall-through
|
||||
case T_DOUBLE: return Op_CompressM;
|
||||
default: fatal("MASK_COMPRESS: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_BIT_COUNT: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // Returning Op_PopCountI
|
||||
case T_SHORT: // for byte and short types temporarily
|
||||
case T_INT: return Op_PopCountI;
|
||||
case T_LONG: return Op_PopCountL;
|
||||
default: fatal("BIT_COUNT: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_TZ_COUNT: {
|
||||
switch (bt) {
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT: return Op_CountTrailingZerosI;
|
||||
case T_LONG: return Op_CountTrailingZerosL;
|
||||
default: fatal("TZ_COUNT: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_LZ_COUNT: {
|
||||
switch (bt) {
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT: return Op_CountLeadingZerosI;
|
||||
case T_LONG: return Op_CountLeadingZerosL;
|
||||
default: fatal("LZ_COUNT: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_REVERSE: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // Temporarily returning
|
||||
case T_SHORT: // Op_ReverseI for byte and short
|
||||
case T_INT: return Op_ReverseI;
|
||||
case T_LONG: return Op_ReverseL;
|
||||
default: fatal("REVERSE: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_REVERSE_BYTES: {
|
||||
switch (bt) {
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT: return Op_ReverseBytesI;
|
||||
case T_LONG: return Op_ReverseBytesL;
|
||||
default: fatal("REVERSE_BYTES: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_COMPRESS_BITS: {
|
||||
switch (bt) {
|
||||
case T_INT:
|
||||
case T_LONG: return Op_CompressBits;
|
||||
default: fatal("COMPRESS_BITS: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_EXPAND_BITS: {
|
||||
switch (bt) {
|
||||
case T_INT:
|
||||
case T_LONG: return Op_ExpandBits;
|
||||
default: fatal("EXPAND_BITS: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case VECTOR_OP_TAN:
|
||||
case VECTOR_OP_TANH:
|
||||
case VECTOR_OP_SIN:
|
||||
|
||||
@ -54,6 +54,7 @@ class VectorSupport : AllStatic {
|
||||
VECTOR_OP_ABS = 0,
|
||||
VECTOR_OP_NEG = 1,
|
||||
VECTOR_OP_SQRT = 2,
|
||||
VECTOR_OP_BIT_COUNT = 3,
|
||||
|
||||
// Binary
|
||||
VECTOR_OP_ADD = 4,
|
||||
@ -89,6 +90,17 @@ class VectorSupport : AllStatic {
|
||||
VECTOR_OP_LROTATE = 24,
|
||||
VECTOR_OP_RROTATE = 25,
|
||||
|
||||
VECTOR_OP_COMPRESS = 26,
|
||||
VECTOR_OP_EXPAND = 27,
|
||||
VECTOR_OP_MASK_COMPRESS = 28,
|
||||
|
||||
VECTOR_OP_TZ_COUNT = 29,
|
||||
VECTOR_OP_LZ_COUNT = 30,
|
||||
VECTOR_OP_REVERSE = 31,
|
||||
VECTOR_OP_REVERSE_BYTES = 32,
|
||||
VECTOR_OP_COMPRESS_BITS = 33,
|
||||
VECTOR_OP_EXPAND_BITS = 34,
|
||||
|
||||
// Vector Math Library
|
||||
VECTOR_OP_TAN = 101,
|
||||
VECTOR_OP_TANH = 102,
|
||||
|
||||
@ -1775,6 +1775,9 @@
|
||||
declare_c2_type(FmaVFNode, VectorNode) \
|
||||
declare_c2_type(CMoveVFNode, VectorNode) \
|
||||
declare_c2_type(CMoveVDNode, VectorNode) \
|
||||
declare_c2_type(CompressVNode, VectorNode) \
|
||||
declare_c2_type(CompressMNode, VectorNode) \
|
||||
declare_c2_type(ExpandVNode, VectorNode) \
|
||||
declare_c2_type(MulReductionVDNode, ReductionNode) \
|
||||
declare_c2_type(DivVFNode, VectorNode) \
|
||||
declare_c2_type(DivVDNode, VectorNode) \
|
||||
@ -1866,6 +1869,10 @@
|
||||
declare_c2_type(VectorUnboxNode, VectorNode) \
|
||||
declare_c2_type(VectorReinterpretNode, VectorNode) \
|
||||
declare_c2_type(VectorMaskCastNode, VectorNode) \
|
||||
declare_c2_type(CountLeadingZerosVNode, VectorNode) \
|
||||
declare_c2_type(CountTrailingZerosVNode, VectorNode) \
|
||||
declare_c2_type(ReverseBytesVNode, VectorNode) \
|
||||
declare_c2_type(ReverseVNode, VectorNode) \
|
||||
declare_c2_type(MaskAllNode, VectorNode) \
|
||||
declare_c2_type(AndVMaskNode, VectorNode) \
|
||||
declare_c2_type(OrVMaskNode, VectorNode) \
|
||||
|
||||
@ -721,6 +721,10 @@ inline bool is_integral_type(BasicType t) {
|
||||
return is_subword_type(t) || t == T_INT || t == T_LONG;
|
||||
}
|
||||
|
||||
inline bool is_non_subword_integral_type(BasicType t) {
|
||||
return t == T_INT || t == T_LONG;
|
||||
}
|
||||
|
||||
inline bool is_floating_point_type(BasicType t) {
|
||||
return (t == T_FLOAT || t == T_DOUBLE);
|
||||
}
|
||||
|
||||
@ -32,8 +32,6 @@ import java.lang.annotation.Target;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.ref.Reference;
|
||||
import java.io.FileDescriptor;
|
||||
import java.nio.Buffer;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import jdk.internal.access.JavaNioAccess;
|
||||
import jdk.internal.access.SharedSecrets;
|
||||
@ -313,59 +311,25 @@ public class ScopedMemoryAccess {
|
||||
}
|
||||
}
|
||||
|
||||
// ByteBuffer vector access ops
|
||||
|
||||
// Buffer access constants, to be initialized when required.
|
||||
// Avoids a null value for NIO_ACCESS, due to class initialization dependencies
|
||||
static final class BufferAccess {
|
||||
// Buffer.address
|
||||
static final long BUFFER_ADDRESS
|
||||
= UNSAFE.objectFieldOffset(Buffer.class, "address");
|
||||
|
||||
// ByteBuffer.hb
|
||||
static final long BYTE_BUFFER_HB
|
||||
= UNSAFE.objectFieldOffset(ByteBuffer.class, "hb");
|
||||
|
||||
static final long BYTE_BUFFER_IS_READ_ONLY
|
||||
= UNSAFE.objectFieldOffset(ByteBuffer.class, "isReadOnly");
|
||||
|
||||
@ForceInline
|
||||
static Object bufferBase(ByteBuffer bb) {
|
||||
return UNSAFE.getReference(bb, BYTE_BUFFER_HB);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
static long bufferAddress(ByteBuffer bb, long offset) {
|
||||
return UNSAFE.getLong(bb, BUFFER_ADDRESS) + offset;
|
||||
}
|
||||
|
||||
static final JavaNioAccess NIO_ACCESS = SharedSecrets.getJavaNioAccess();
|
||||
|
||||
@ForceInline
|
||||
static MemorySessionImpl session(ByteBuffer bb) {
|
||||
MemorySegment segment = NIO_ACCESS.bufferSegment(bb);
|
||||
return segment != null ?
|
||||
((AbstractMemorySegmentImpl)segment).sessionImpl() : null;
|
||||
}
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public static boolean isReadOnly(ByteBuffer bb) {
|
||||
return UNSAFE.getBoolean(bb, BufferAccess.BYTE_BUFFER_IS_READ_ONLY);
|
||||
}
|
||||
// MemorySegment vector access ops
|
||||
|
||||
@ForceInline
|
||||
public static
|
||||
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>>
|
||||
V loadFromByteBuffer(Class<? extends V> vmClass, Class<E> e, int length,
|
||||
ByteBuffer bb, int offset,
|
||||
S s,
|
||||
VectorSupport.LoadOperation<ByteBuffer, V, S> defaultImpl) {
|
||||
V loadFromMemorySegment(Class<? extends V> vmClass, Class<E> e, int length,
|
||||
AbstractMemorySegmentImpl msp, long offset,
|
||||
S s,
|
||||
VectorSupport.LoadOperation<AbstractMemorySegmentImpl, V, S> defaultImpl) {
|
||||
// @@@ Smarter alignment checking if accessing heap segment backing non-byte[] array
|
||||
if (msp.maxAlignMask() > 1) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
|
||||
try {
|
||||
return loadFromByteBufferScoped(
|
||||
BufferAccess.session(bb),
|
||||
return loadFromMemorySegmentScopedInternal(
|
||||
msp.sessionImpl(),
|
||||
vmClass, e, length,
|
||||
bb, offset,
|
||||
msp, offset,
|
||||
s,
|
||||
defaultImpl);
|
||||
} catch (ScopedAccessError ex) {
|
||||
@ -377,22 +341,18 @@ public class ScopedMemoryAccess {
|
||||
@ForceInline
|
||||
private static
|
||||
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>>
|
||||
V loadFromByteBufferScoped(MemorySessionImpl session,
|
||||
Class<? extends V> vmClass, Class<E> e, int length,
|
||||
ByteBuffer bb, int offset,
|
||||
S s,
|
||||
VectorSupport.LoadOperation<ByteBuffer, V, S> defaultImpl) {
|
||||
V loadFromMemorySegmentScopedInternal(MemorySessionImpl session,
|
||||
Class<? extends V> vmClass, Class<E> e, int length,
|
||||
AbstractMemorySegmentImpl msp, long offset,
|
||||
S s,
|
||||
VectorSupport.LoadOperation<AbstractMemorySegmentImpl, V, S> defaultImpl) {
|
||||
try {
|
||||
if (session != null) {
|
||||
session.checkValidState();
|
||||
}
|
||||
|
||||
final byte[] base = (byte[]) BufferAccess.bufferBase(bb);
|
||||
session.checkValidState();
|
||||
|
||||
return VectorSupport.load(vmClass, e, length,
|
||||
base, BufferAccess.bufferAddress(bb, offset),
|
||||
bb, offset, s,
|
||||
defaultImpl);
|
||||
msp.unsafeGetBase(), msp.unsafeGetOffset() + offset,
|
||||
msp, offset, s,
|
||||
defaultImpl);
|
||||
} finally {
|
||||
Reference.reachabilityFence(session);
|
||||
}
|
||||
@ -402,17 +362,22 @@ public class ScopedMemoryAccess {
|
||||
public static
|
||||
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>,
|
||||
M extends VectorSupport.VectorMask<E>>
|
||||
V loadFromByteBufferMasked(Class<? extends V> vmClass, Class<M> maskClass, Class<E> e,
|
||||
int length, ByteBuffer bb, int offset, M m, S s,
|
||||
VectorSupport.LoadVectorMaskedOperation<ByteBuffer, V, S, M> defaultImpl) {
|
||||
V loadFromMemorySegmentMasked(Class<? extends V> vmClass, Class<M> maskClass, Class<E> e,
|
||||
int length, AbstractMemorySegmentImpl msp, long offset, M m, S s,
|
||||
VectorSupport.LoadVectorMaskedOperation<AbstractMemorySegmentImpl, V, S, M> defaultImpl) {
|
||||
// @@@ Smarter alignment checking if accessing heap segment backing non-byte[] array
|
||||
if (msp.maxAlignMask() > 1) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
|
||||
try {
|
||||
return loadFromByteBufferMaskedScoped(
|
||||
BufferAccess.session(bb),
|
||||
return loadFromMemorySegmentMaskedScopedInternal(
|
||||
msp.sessionImpl(),
|
||||
vmClass, maskClass, e, length,
|
||||
bb, offset, m,
|
||||
msp, offset, m,
|
||||
s,
|
||||
defaultImpl);
|
||||
} catch (ScopedMemoryAccess.ScopedAccessError ex) {
|
||||
} catch (ScopedAccessError ex) {
|
||||
throw new IllegalStateException("This segment is already closed");
|
||||
}
|
||||
}
|
||||
@ -422,19 +387,17 @@ public class ScopedMemoryAccess {
|
||||
private static
|
||||
<V extends VectorSupport.Vector<E>, E, S extends VectorSupport.VectorSpecies<E>,
|
||||
M extends VectorSupport.VectorMask<E>>
|
||||
V loadFromByteBufferMaskedScoped(MemorySessionImpl session, Class<? extends V> vmClass,
|
||||
Class<M> maskClass, Class<E> e, int length,
|
||||
ByteBuffer bb, int offset, M m,
|
||||
S s,
|
||||
VectorSupport.LoadVectorMaskedOperation<ByteBuffer, V, S, M> defaultImpl) {
|
||||
V loadFromMemorySegmentMaskedScopedInternal(MemorySessionImpl session, Class<? extends V> vmClass,
|
||||
Class<M> maskClass, Class<E> e, int length,
|
||||
AbstractMemorySegmentImpl msp, long offset, M m,
|
||||
S s,
|
||||
VectorSupport.LoadVectorMaskedOperation<AbstractMemorySegmentImpl, V, S, M> defaultImpl) {
|
||||
try {
|
||||
if (session != null) {
|
||||
session.checkValidState();
|
||||
}
|
||||
session.checkValidState();
|
||||
|
||||
return VectorSupport.loadMasked(vmClass, maskClass, e, length,
|
||||
BufferAccess.bufferBase(bb), BufferAccess.bufferAddress(bb, offset), m,
|
||||
bb, offset, s,
|
||||
msp.unsafeGetBase(), msp.unsafeGetOffset() + offset, m,
|
||||
msp, offset, s,
|
||||
defaultImpl);
|
||||
} finally {
|
||||
Reference.reachabilityFence(session);
|
||||
@ -444,16 +407,21 @@ public class ScopedMemoryAccess {
|
||||
@ForceInline
|
||||
public static
|
||||
<V extends VectorSupport.Vector<E>, E>
|
||||
void storeIntoByteBuffer(Class<? extends V> vmClass, Class<E> e, int length,
|
||||
V v,
|
||||
ByteBuffer bb, int offset,
|
||||
VectorSupport.StoreVectorOperation<ByteBuffer, V> defaultImpl) {
|
||||
void storeIntoMemorySegment(Class<? extends V> vmClass, Class<E> e, int length,
|
||||
V v,
|
||||
AbstractMemorySegmentImpl msp, long offset,
|
||||
VectorSupport.StoreVectorOperation<AbstractMemorySegmentImpl, V> defaultImpl) {
|
||||
// @@@ Smarter alignment checking if accessing heap segment backing non-byte[] array
|
||||
if (msp.maxAlignMask() > 1) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
|
||||
try {
|
||||
storeIntoByteBufferScoped(
|
||||
BufferAccess.session(bb),
|
||||
storeIntoMemorySegmentScopedInternal(
|
||||
msp.sessionImpl(),
|
||||
vmClass, e, length,
|
||||
v,
|
||||
bb, offset,
|
||||
msp, offset,
|
||||
defaultImpl);
|
||||
} catch (ScopedAccessError ex) {
|
||||
throw new IllegalStateException("This segment is already closed");
|
||||
@ -464,23 +432,19 @@ public class ScopedMemoryAccess {
|
||||
@ForceInline
|
||||
private static
|
||||
<V extends VectorSupport.Vector<E>, E>
|
||||
void storeIntoByteBufferScoped(MemorySessionImpl session,
|
||||
Class<? extends V> vmClass, Class<E> e, int length,
|
||||
V v,
|
||||
ByteBuffer bb, int offset,
|
||||
VectorSupport.StoreVectorOperation<ByteBuffer, V> defaultImpl) {
|
||||
void storeIntoMemorySegmentScopedInternal(MemorySessionImpl session,
|
||||
Class<? extends V> vmClass, Class<E> e, int length,
|
||||
V v,
|
||||
AbstractMemorySegmentImpl msp, long offset,
|
||||
VectorSupport.StoreVectorOperation<AbstractMemorySegmentImpl, V> defaultImpl) {
|
||||
try {
|
||||
if (session != null) {
|
||||
session.checkValidState();
|
||||
}
|
||||
|
||||
final byte[] base = (byte[]) BufferAccess.bufferBase(bb);
|
||||
session.checkValidState();
|
||||
|
||||
VectorSupport.store(vmClass, e, length,
|
||||
base, BufferAccess.bufferAddress(bb, offset),
|
||||
v,
|
||||
bb, offset,
|
||||
defaultImpl);
|
||||
msp.unsafeGetBase(), msp.unsafeGetOffset() + offset,
|
||||
v,
|
||||
msp, offset,
|
||||
defaultImpl);
|
||||
} finally {
|
||||
Reference.reachabilityFence(session);
|
||||
}
|
||||
@ -489,18 +453,23 @@ public class ScopedMemoryAccess {
|
||||
@ForceInline
|
||||
public static
|
||||
<V extends VectorSupport.Vector<E>, E, M extends VectorSupport.VectorMask<E>>
|
||||
void storeIntoByteBufferMasked(Class<? extends V> vmClass, Class<M> maskClass, Class<E> e,
|
||||
int length, V v, M m,
|
||||
ByteBuffer bb, int offset,
|
||||
VectorSupport.StoreVectorMaskedOperation<ByteBuffer, V, M> defaultImpl) {
|
||||
void storeIntoMemorySegmentMasked(Class<? extends V> vmClass, Class<M> maskClass, Class<E> e,
|
||||
int length, V v, M m,
|
||||
AbstractMemorySegmentImpl msp, long offset,
|
||||
VectorSupport.StoreVectorMaskedOperation<AbstractMemorySegmentImpl, V, M> defaultImpl) {
|
||||
// @@@ Smarter alignment checking if accessing heap segment backing non-byte[] array
|
||||
if (msp.maxAlignMask() > 1) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
|
||||
try {
|
||||
storeIntoByteBufferMaskedScoped(
|
||||
BufferAccess.session(bb),
|
||||
storeIntoMemorySegmentMaskedScopedInternal(
|
||||
msp.sessionImpl(),
|
||||
vmClass, maskClass, e, length,
|
||||
v, m,
|
||||
bb, offset,
|
||||
msp, offset,
|
||||
defaultImpl);
|
||||
} catch (ScopedMemoryAccess.ScopedAccessError ex) {
|
||||
} catch (ScopedAccessError ex) {
|
||||
throw new IllegalStateException("This segment is already closed");
|
||||
}
|
||||
}
|
||||
@ -509,20 +478,18 @@ public class ScopedMemoryAccess {
|
||||
@ForceInline
|
||||
private static
|
||||
<V extends VectorSupport.Vector<E>, E, M extends VectorSupport.VectorMask<E>>
|
||||
void storeIntoByteBufferMaskedScoped(MemorySessionImpl session,
|
||||
Class<? extends V> vmClass, Class<M> maskClass,
|
||||
Class<E> e, int length, V v, M m,
|
||||
ByteBuffer bb, int offset,
|
||||
VectorSupport.StoreVectorMaskedOperation<ByteBuffer, V, M> defaultImpl) {
|
||||
void storeIntoMemorySegmentMaskedScopedInternal(MemorySessionImpl session,
|
||||
Class<? extends V> vmClass, Class<M> maskClass,
|
||||
Class<E> e, int length, V v, M m,
|
||||
AbstractMemorySegmentImpl msp, long offset,
|
||||
VectorSupport.StoreVectorMaskedOperation<AbstractMemorySegmentImpl, V, M> defaultImpl) {
|
||||
try {
|
||||
if (session != null) {
|
||||
session.checkValidState();
|
||||
}
|
||||
session.checkValidState();
|
||||
|
||||
VectorSupport.storeMasked(vmClass, maskClass, e, length,
|
||||
BufferAccess.bufferBase(bb), BufferAccess.bufferAddress(bb, offset),
|
||||
msp.unsafeGetBase(), msp.unsafeGetOffset() + offset,
|
||||
v, m,
|
||||
bb, offset,
|
||||
msp, offset,
|
||||
defaultImpl);
|
||||
} finally {
|
||||
Reference.reachabilityFence(session);
|
||||
|
||||
@ -41,6 +41,7 @@ public class VectorSupport {
|
||||
public static final int VECTOR_OP_ABS = 0;
|
||||
public static final int VECTOR_OP_NEG = 1;
|
||||
public static final int VECTOR_OP_SQRT = 2;
|
||||
public static final int VECTOR_OP_BIT_COUNT = 3;
|
||||
|
||||
// Binary
|
||||
public static final int VECTOR_OP_ADD = 4;
|
||||
@ -76,6 +77,23 @@ public class VectorSupport {
|
||||
public static final int VECTOR_OP_LROTATE = 24;
|
||||
public static final int VECTOR_OP_RROTATE = 25;
|
||||
|
||||
// Compression expansion operations
|
||||
public static final int VECTOR_OP_COMPRESS = 26;
|
||||
public static final int VECTOR_OP_EXPAND = 27;
|
||||
public static final int VECTOR_OP_MASK_COMPRESS = 28;
|
||||
|
||||
// Leading/Trailing zeros count operations
|
||||
public static final int VECTOR_OP_TZ_COUNT = 29;
|
||||
public static final int VECTOR_OP_LZ_COUNT = 30;
|
||||
|
||||
// Reverse operation
|
||||
public static final int VECTOR_OP_REVERSE = 31;
|
||||
public static final int VECTOR_OP_REVERSE_BYTES = 32;
|
||||
|
||||
// Compress and Expand Bits operation
|
||||
public static final int VECTOR_OP_COMPRESS_BITS = 33;
|
||||
public static final int VECTOR_OP_EXPAND_BITS = 34;
|
||||
|
||||
// Math routines
|
||||
public static final int VECTOR_OP_TAN = 101;
|
||||
public static final int VECTOR_OP_TANH = 102;
|
||||
@ -363,7 +381,7 @@ public class VectorSupport {
|
||||
public interface LoadOperation<C,
|
||||
VM extends VectorPayload,
|
||||
S extends VectorSpecies<?>> {
|
||||
VM load(C container, int index, S s);
|
||||
VM load(C container, long index, S s);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
@ -375,7 +393,7 @@ public class VectorSupport {
|
||||
VM load(Class<? extends VM> vmClass, Class<E> eClass,
|
||||
int length,
|
||||
Object base, long offset,
|
||||
C container, int index, S s,
|
||||
C container, long index, S s,
|
||||
LoadOperation<C, VM, S> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.load(container, index, s);
|
||||
@ -387,7 +405,7 @@ public class VectorSupport {
|
||||
V extends Vector<?>,
|
||||
S extends VectorSpecies<?>,
|
||||
M extends VectorMask<?>> {
|
||||
V load(C container, int index, S s, M m);
|
||||
V load(C container, long index, S s, M m);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
@ -400,7 +418,7 @@ public class VectorSupport {
|
||||
V loadMasked(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
|
||||
int length,
|
||||
Object base, long offset,
|
||||
M m, C container, int index, S s,
|
||||
M m, C container, long index, S s,
|
||||
LoadVectorMaskedOperation<C, V, S, M> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.load(container, index, s, m);
|
||||
@ -437,18 +455,18 @@ public class VectorSupport {
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface StoreVectorOperation<C,
|
||||
V extends Vector<?>> {
|
||||
void store(C container, int index, V v);
|
||||
V extends VectorPayload> {
|
||||
void store(C container, long index, V v);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<C,
|
||||
V extends Vector<?>>
|
||||
V extends VectorPayload>
|
||||
void store(Class<?> vClass, Class<?> eClass,
|
||||
int length,
|
||||
Object base, long offset,
|
||||
V v, C container, int index,
|
||||
V v, C container, long index,
|
||||
StoreVectorOperation<C, V> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
defaultImpl.store(container, index, v);
|
||||
@ -457,7 +475,7 @@ public class VectorSupport {
|
||||
public interface StoreVectorMaskedOperation<C,
|
||||
V extends Vector<?>,
|
||||
M extends VectorMask<?>> {
|
||||
void store(C container, int index, V v, M m);
|
||||
void store(C container, long index, V v, M m);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
@ -469,7 +487,7 @@ public class VectorSupport {
|
||||
void storeMasked(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
|
||||
int length,
|
||||
Object base, long offset,
|
||||
V v, M m, C container, int index,
|
||||
V v, M m, C container, long index,
|
||||
StoreVectorMaskedOperation<C, V, M> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
defaultImpl.store(container, index, v, m);
|
||||
@ -626,6 +644,26 @@ public class VectorSupport {
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface CompressExpandOperation<V extends Vector<?>,
|
||||
M extends VectorMask<?>> {
|
||||
VectorPayload apply(V v, M m);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<V extends Vector<E>,
|
||||
M extends VectorMask<E>,
|
||||
E>
|
||||
VectorPayload compressExpandOp(int opr,
|
||||
Class<? extends V> vClass, Class<? extends M> mClass, Class<E> eClass,
|
||||
int length, V v, M m,
|
||||
CompressExpandOperation<V, M> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(v, m);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<VP extends VectorPayload>
|
||||
|
||||
@ -158,6 +158,8 @@ module java.base {
|
||||
jdk.jartool,
|
||||
jdk.jlink,
|
||||
jdk.net;
|
||||
exports jdk.internal.foreign to
|
||||
jdk.incubator.vector;
|
||||
exports jdk.internal.event to
|
||||
jdk.jfr;
|
||||
exports jdk.internal.jimage to
|
||||
|
||||
@ -38,6 +38,7 @@ import com.sun.tools.javac.util.JCDiagnostic.SimpleDiagnosticPosition;
|
||||
import com.sun.tools.javac.util.JCDiagnostic.Warning;
|
||||
import com.sun.tools.javac.util.Log;
|
||||
import com.sun.tools.javac.util.MandatoryWarningHandler;
|
||||
import com.sun.tools.javac.util.Names;
|
||||
import com.sun.tools.javac.util.Options;
|
||||
|
||||
import javax.tools.JavaFileObject;
|
||||
@ -78,6 +79,7 @@ public class Preview {
|
||||
|
||||
private final Set<JavaFileObject> sourcesWithPreviewFeatures = new HashSet<>();
|
||||
|
||||
private final Names names;
|
||||
private final Lint lint;
|
||||
private final Log log;
|
||||
private final Source source;
|
||||
@ -95,6 +97,7 @@ public class Preview {
|
||||
Preview(Context context) {
|
||||
context.put(previewKey, this);
|
||||
Options options = Options.instance(context);
|
||||
names = Names.instance(context);
|
||||
enabled = options.isSet(PREVIEW);
|
||||
log = Log.instance(context);
|
||||
lint = Lint.instance(context);
|
||||
@ -115,7 +118,22 @@ public class Preview {
|
||||
}
|
||||
}
|
||||
return majorVersionToSource;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if {@code s} is deemed to participate in the preview of {@code previewSymbol}, and
|
||||
* therefore no warnings or errors will be produced.
|
||||
*
|
||||
* @param s the symbol depending on the preview symbol
|
||||
* @param previewSymbol the preview symbol marked with @Preview
|
||||
* @return true if {@code s} is participating in the preview of {@code previewSymbol}
|
||||
*/
|
||||
public boolean participatesInPreview(Symbol s, Symbol previewSymbol) {
|
||||
// Hardcode the incubating vector API module for now
|
||||
// Will generalize with an annotation, @PreviewParticipating say, later
|
||||
return previewSymbol.packge().modle == s.packge().modle ||
|
||||
s.packge().modle.name == names.jdk_incubator_vector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Report usage of a preview feature. Usages reported through this method will affect the
|
||||
|
||||
@ -3600,7 +3600,7 @@ public class Check {
|
||||
}
|
||||
|
||||
void checkPreview(DiagnosticPosition pos, Symbol other, Symbol s) {
|
||||
if ((s.flags() & PREVIEW_API) != 0 && s.packge().modle != other.packge().modle) {
|
||||
if ((s.flags() & PREVIEW_API) != 0 && !preview.participatesInPreview(other, s)) {
|
||||
if ((s.flags() & PREVIEW_REFLECTIVE) == 0) {
|
||||
if (!preview.isEnabled()) {
|
||||
log.error(pos, Errors.IsPreview(s));
|
||||
|
||||
@ -124,6 +124,7 @@ public class Names {
|
||||
// module names
|
||||
public final Name java_base;
|
||||
public final Name jdk_unsupported;
|
||||
public final Name jdk_incubator_vector;
|
||||
|
||||
// attribute names
|
||||
public final Name Annotation;
|
||||
@ -305,6 +306,7 @@ public class Names {
|
||||
// module names
|
||||
java_base = fromString("java.base");
|
||||
jdk_unsupported = fromString("jdk.unsupported");
|
||||
jdk_incubator_vector = fromString("jdk.incubator.vector");
|
||||
|
||||
// attribute names
|
||||
Annotation = fromString("Annotation");
|
||||
|
||||
@ -28,6 +28,10 @@ import java.util.Objects;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
abstract class AbstractMask<E> extends VectorMask<E> {
|
||||
@ -77,7 +81,15 @@ abstract class AbstractMask<E> extends VectorMask<E> {
|
||||
|
||||
@Override
|
||||
public void intoArray(boolean[] bits, int i) {
|
||||
System.arraycopy(getBits(), 0, bits, i, length());
|
||||
AbstractSpecies<E> vsp = (AbstractSpecies<E>) vectorSpecies();
|
||||
int laneCount = vsp.laneCount();
|
||||
i = VectorIntrinsics.checkFromIndexSize(i, laneCount, bits.length);
|
||||
VectorSupport.store(
|
||||
vsp.maskType(), vsp.elementType(), laneCount,
|
||||
bits, (long) i + Unsafe.ARRAY_BOOLEAN_BASE_OFFSET,
|
||||
this, bits, i,
|
||||
(c, idx, s) -> System.arraycopy(s.getBits(), 0, c, (int) idx, s.length()));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -192,6 +204,15 @@ abstract class AbstractMask<E> extends VectorMask<E> {
|
||||
return this.andNot(badMask);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorMask<E> indexInRange(long offset, long limit) {
|
||||
int vlength = length();
|
||||
Vector<E> iota = vectorSpecies().zero().addIndex(1);
|
||||
VectorMask<E> badMask = checkIndex0(offset, limit, iota, vlength);
|
||||
return this.andNot(badMask);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
AbstractVector<E>
|
||||
@ -215,7 +236,7 @@ abstract class AbstractMask<E> extends VectorMask<E> {
|
||||
*/
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void checkIndexByLane(int offset, int alength,
|
||||
void checkIndexByLane(int offset, int length,
|
||||
Vector<E> iota,
|
||||
int esize) {
|
||||
if (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK == 0) {
|
||||
@ -229,15 +250,15 @@ abstract class AbstractMask<E> extends VectorMask<E> {
|
||||
int vlength = length();
|
||||
VectorMask<E> badMask;
|
||||
if (esize == 1) {
|
||||
badMask = checkIndex0(offset, alength, iota, vlength);
|
||||
badMask = checkIndex0(offset, length, iota, vlength);
|
||||
} else if (offset >= 0) {
|
||||
// Masked access to multi-byte lanes in byte array.
|
||||
// It could be aligned anywhere.
|
||||
int elemCount = Math.min(vlength, (alength - offset) / esize);
|
||||
int elemCount = Math.min(vlength, (length - offset) / esize);
|
||||
badMask = checkIndex0(0, elemCount, iota, vlength);
|
||||
} else {
|
||||
int clipOffset = Math.max(offset, -(vlength * esize));
|
||||
badMask = checkIndex0(clipOffset, alength,
|
||||
badMask = checkIndex0(clipOffset, length,
|
||||
iota.lanewise(VectorOperators.MUL, esize),
|
||||
vlength * esize);
|
||||
}
|
||||
@ -245,20 +266,20 @@ abstract class AbstractMask<E> extends VectorMask<E> {
|
||||
if (badMask.anyTrue()) {
|
||||
int badLane = badMask.firstTrue();
|
||||
throw ((AbstractMask<E>)badMask)
|
||||
.checkIndexFailed(offset, badLane, alength, esize);
|
||||
.checkIndexFailed(offset, badLane, length, esize);
|
||||
}
|
||||
}
|
||||
|
||||
private
|
||||
@ForceInline
|
||||
VectorMask<E> checkIndex0(int offset, int alength,
|
||||
VectorMask<E> checkIndex0(int offset, int length,
|
||||
Vector<E> iota, int vlength) {
|
||||
// An active lane is bad if its number is greater than
|
||||
// alength-offset, since when added to offset it will step off
|
||||
// length-offset, since when added to offset it will step off
|
||||
// of the end of the array. To avoid overflow when
|
||||
// converting, clip the comparison value to [0..vlength]
|
||||
// inclusive.
|
||||
int indexLimit = Math.max(0, Math.min(alength - offset, vlength));
|
||||
int indexLimit = Math.max(0, Math.min(length - offset, vlength));
|
||||
VectorMask<E> badMask =
|
||||
iota.compare(GE, iota.broadcast(indexLimit));
|
||||
if (offset < 0) {
|
||||
@ -280,14 +301,90 @@ abstract class AbstractMask<E> extends VectorMask<E> {
|
||||
return badMask;
|
||||
}
|
||||
|
||||
private IndexOutOfBoundsException checkIndexFailed(int offset, int lane,
|
||||
int alength, int esize) {
|
||||
/**
|
||||
* Test if a masked memory access at a given offset into an array
|
||||
* of the given length will stay within the array.
|
||||
* The per-lane offsets are iota*esize.
|
||||
*/
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void checkIndexByLane(long offset, long length,
|
||||
Vector<E> iota,
|
||||
int esize) {
|
||||
if (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK == 0) {
|
||||
return;
|
||||
}
|
||||
// Although the specification is simple, the implementation is
|
||||
// tricky, because the value iota*esize might possibly
|
||||
// overflow. So we calculate our test values as scalars,
|
||||
// clipping to the range [-1..VLENGTH], and test them against
|
||||
// the unscaled iota vector, whose values are in [0..VLENGTH-1].
|
||||
int vlength = length();
|
||||
VectorMask<E> badMask;
|
||||
if (esize == 1) {
|
||||
badMask = checkIndex0(offset, length, iota, vlength);
|
||||
} else if (offset >= 0) {
|
||||
// Masked access to multi-byte lanes in byte array.
|
||||
// It could be aligned anywhere.
|
||||
// 0 <= elemCount <= vlength
|
||||
int elemCount = (int) Math.min(vlength, (length - offset) / esize);
|
||||
badMask = checkIndex0(0, elemCount, iota, vlength);
|
||||
} else {
|
||||
// -vlength * esize <= clipOffset <= 0
|
||||
int clipOffset = (int) Math.max(offset, -(vlength * esize));
|
||||
badMask = checkIndex0(clipOffset, length,
|
||||
iota.lanewise(VectorOperators.MUL, esize),
|
||||
vlength * esize);
|
||||
}
|
||||
badMask = badMask.and(this);
|
||||
if (badMask.anyTrue()) {
|
||||
int badLane = badMask.firstTrue();
|
||||
throw ((AbstractMask<E>)badMask)
|
||||
.checkIndexFailed(offset, badLane, length, esize);
|
||||
}
|
||||
}
|
||||
|
||||
private
|
||||
@ForceInline
|
||||
VectorMask<E> checkIndex0(long offset, long length,
|
||||
Vector<E> iota, int vlength) {
|
||||
// An active lane is bad if its number is greater than
|
||||
// length-offset, since when added to offset it will step off
|
||||
// of the end of the array. To avoid overflow when
|
||||
// converting, clip the comparison value to [0..vlength]
|
||||
// inclusive.
|
||||
// 0 <= indexLimit <= vlength
|
||||
int indexLimit = (int) Math.max(0, Math.min(length - offset, vlength));
|
||||
VectorMask<E> badMask =
|
||||
iota.compare(GE, iota.broadcast(indexLimit));
|
||||
if (offset < 0) {
|
||||
// An active lane is bad if its number is less than
|
||||
// -offset, because when added to offset it will then
|
||||
// address an array element at a negative index. To avoid
|
||||
// overflow when converting, clip the comparison value at
|
||||
// vlength. This specific expression works correctly even
|
||||
// when offset is Integer.MIN_VALUE.
|
||||
// 0 <= firstGoodIndex <= vlength
|
||||
int firstGoodIndex = (int) -Math.max(offset, -vlength);
|
||||
VectorMask<E> badMask2 =
|
||||
iota.compare(LT, iota.broadcast(firstGoodIndex));
|
||||
if (indexLimit >= vlength) {
|
||||
badMask = badMask2; // 1st badMask is all true
|
||||
} else {
|
||||
badMask = badMask.or(badMask2);
|
||||
}
|
||||
}
|
||||
return badMask;
|
||||
}
|
||||
|
||||
private IndexOutOfBoundsException checkIndexFailed(long offset, int lane,
|
||||
long length, int esize) {
|
||||
String msg = String.format("Masked range check failed: "+
|
||||
"vector mask %s out of bounds at "+
|
||||
"index %d+%d in array of length %d",
|
||||
this, offset, lane * esize, alength);
|
||||
"index %d+%d for length %d",
|
||||
this, offset, lane * esize, length);
|
||||
if (esize != 1) {
|
||||
msg += String.format(" (each lane spans %d array elements)", esize);
|
||||
msg += String.format(" (each lane spans %d elements)", esize);
|
||||
}
|
||||
throw new IndexOutOfBoundsException(msg);
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -24,10 +24,11 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.annotation.Stable;
|
||||
import java.nio.ByteOrder;
|
||||
import java.lang.reflect.Array;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.Arrays;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -203,12 +204,24 @@ abstract class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSupport.V
|
||||
return VectorIntrinsics.roundDown(length, laneCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long loopBound(long length) {
|
||||
return VectorIntrinsics.roundDown(length, laneCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final VectorMask<E> indexInRange(int offset, int limit) {
|
||||
return maskAll(true).indexInRange(offset, limit);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final VectorMask<E> indexInRange(long offset, long limit) {
|
||||
return maskAll(true).indexInRange(offset, limit);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F> VectorSpecies<F> withLanes(Class<F> newType) {
|
||||
@ -349,9 +362,9 @@ abstract class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSupport.V
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Vector<E> fromByteArray(byte[] a, int offset, ByteOrder bo) {
|
||||
public final Vector<E> fromMemorySegment(MemorySegment ms, long offset, ByteOrder bo) {
|
||||
return dummyVector()
|
||||
.fromByteArray0(a, offset)
|
||||
.fromMemorySegment0(ms, offset)
|
||||
.maybeSwap(bo);
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -24,10 +24,10 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
@ -194,7 +194,7 @@ abstract class AbstractVector<E> extends Vector<E> {
|
||||
abstract AbstractShuffle<E> shuffleFromOp(IntUnaryOperator fn);
|
||||
|
||||
/*package-private*/
|
||||
abstract AbstractVector<E> fromByteArray0(byte[] a, int offset);
|
||||
abstract AbstractVector<E> fromMemorySegment0(MemorySegment ms, long offset);
|
||||
|
||||
/*package-private*/
|
||||
abstract AbstractVector<E> maybeSwap(ByteOrder bo);
|
||||
@ -504,23 +504,23 @@ abstract class AbstractVector<E> extends Vector<E> {
|
||||
AbstractVector<F> defaultReinterpret(AbstractSpecies<F> rsp) {
|
||||
int blen = Math.max(this.bitSize(), rsp.vectorBitSize()) / Byte.SIZE;
|
||||
ByteOrder bo = ByteOrder.nativeOrder();
|
||||
ByteBuffer bb = ByteBuffer.allocate(blen);
|
||||
this.intoByteBuffer(bb, 0, bo);
|
||||
MemorySegment ms = MemorySegment.ofArray(new byte[blen]);
|
||||
this.intoMemorySegment(ms, 0, bo);
|
||||
VectorMask<F> m = rsp.maskAll(true);
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (rsp.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return ByteVector.fromByteBuffer(rsp.check(byte.class), bb, 0, bo, m.check(byte.class)).check0(rsp);
|
||||
return ByteVector.fromMemorySegment(rsp.check(byte.class), ms, 0, bo, m.check(byte.class)).check0(rsp);
|
||||
case LaneType.SK_SHORT:
|
||||
return ShortVector.fromByteBuffer(rsp.check(short.class), bb, 0, bo, m.check(short.class)).check0(rsp);
|
||||
return ShortVector.fromMemorySegment(rsp.check(short.class), ms, 0, bo, m.check(short.class)).check0(rsp);
|
||||
case LaneType.SK_INT:
|
||||
return IntVector.fromByteBuffer(rsp.check(int.class), bb, 0, bo, m.check(int.class)).check0(rsp);
|
||||
return IntVector.fromMemorySegment(rsp.check(int.class), ms, 0, bo, m.check(int.class)).check0(rsp);
|
||||
case LaneType.SK_LONG:
|
||||
return LongVector.fromByteBuffer(rsp.check(long.class), bb, 0, bo, m.check(long.class)).check0(rsp);
|
||||
return LongVector.fromMemorySegment(rsp.check(long.class), ms, 0, bo, m.check(long.class)).check0(rsp);
|
||||
case LaneType.SK_FLOAT:
|
||||
return FloatVector.fromByteBuffer(rsp.check(float.class), bb, 0, bo, m.check(float.class)).check0(rsp);
|
||||
return FloatVector.fromMemorySegment(rsp.check(float.class), ms, 0, bo, m.check(float.class)).check0(rsp);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return DoubleVector.fromByteBuffer(rsp.check(double.class), bb, 0, bo, m.check(double.class)).check0(rsp);
|
||||
return DoubleVector.fromMemorySegment(rsp.check(double.class), ms, 0, bo, m.check(double.class)).check0(rsp);
|
||||
default:
|
||||
throw new AssertionError(rsp.toString());
|
||||
}
|
||||
@ -730,15 +730,6 @@ abstract class AbstractVector<E> extends Vector<E> {
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
// Byte buffer wrappers.
|
||||
static ByteBuffer wrapper(ByteBuffer bb, ByteOrder bo) {
|
||||
return bb.duplicate().order(bo);
|
||||
}
|
||||
|
||||
static ByteBuffer wrapper(byte[] a, ByteOrder bo) {
|
||||
return ByteBuffer.wrap(a).order(bo);
|
||||
}
|
||||
|
||||
static {
|
||||
// Recode uses of VectorSupport.reinterpret if this assertion fails:
|
||||
assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Byte128Vector extends ByteVector {
|
||||
(Byte128Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector compress(VectorMask<Byte> m) {
|
||||
return (Byte128Vector)
|
||||
super.compressTemplate(Byte128Mask.class,
|
||||
(Byte128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector expand(VectorMask<Byte> m) {
|
||||
return (Byte128Vector)
|
||||
super.expandTemplate(Byte128Mask.class,
|
||||
(Byte128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector selectFrom(Vector<Byte> v) {
|
||||
@ -677,6 +693,15 @@ final class Byte128Vector extends ByteVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Mask compress() {
|
||||
return (Byte128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Byte128Vector.class, Byte128Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -876,29 +901,15 @@ final class Byte128Vector extends ByteVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
|
||||
return super.fromByteArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
|
||||
return super.fromByteBuffer0Template(Byte128Mask.class, bb, offset, (Byte128Mask) m); // specialize
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
|
||||
return super.fromMemorySegment0Template(Byte128Mask.class, ms, offset, (Byte128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -926,22 +937,8 @@ final class Byte128Vector extends ByteVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
|
||||
super.intoByteArray0Template(Byte128Mask.class, a, offset, (Byte128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
|
||||
super.intoByteBuffer0Template(Byte128Mask.class, bb, offset, (Byte128Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
|
||||
super.intoMemorySegment0Template(Byte128Mask.class, ms, offset, (Byte128Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -950,3 +947,4 @@ final class Byte128Vector extends ByteVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Byte256Vector extends ByteVector {
|
||||
(Byte256Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector compress(VectorMask<Byte> m) {
|
||||
return (Byte256Vector)
|
||||
super.compressTemplate(Byte256Mask.class,
|
||||
(Byte256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector expand(VectorMask<Byte> m) {
|
||||
return (Byte256Vector)
|
||||
super.expandTemplate(Byte256Mask.class,
|
||||
(Byte256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector selectFrom(Vector<Byte> v) {
|
||||
@ -709,6 +725,15 @@ final class Byte256Vector extends ByteVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Mask compress() {
|
||||
return (Byte256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Byte256Vector.class, Byte256Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -908,29 +933,15 @@ final class Byte256Vector extends ByteVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
|
||||
return super.fromByteArray0Template(Byte256Mask.class, a, offset, (Byte256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
|
||||
return super.fromByteBuffer0Template(Byte256Mask.class, bb, offset, (Byte256Mask) m); // specialize
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
|
||||
return super.fromMemorySegment0Template(Byte256Mask.class, ms, offset, (Byte256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -958,22 +969,8 @@ final class Byte256Vector extends ByteVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
|
||||
super.intoByteArray0Template(Byte256Mask.class, a, offset, (Byte256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
|
||||
super.intoByteBuffer0Template(Byte256Mask.class, bb, offset, (Byte256Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
|
||||
super.intoMemorySegment0Template(Byte256Mask.class, ms, offset, (Byte256Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -982,3 +979,4 @@ final class Byte256Vector extends ByteVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Byte512Vector extends ByteVector {
|
||||
(Byte512Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector compress(VectorMask<Byte> m) {
|
||||
return (Byte512Vector)
|
||||
super.compressTemplate(Byte512Mask.class,
|
||||
(Byte512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector expand(VectorMask<Byte> m) {
|
||||
return (Byte512Vector)
|
||||
super.expandTemplate(Byte512Mask.class,
|
||||
(Byte512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector selectFrom(Vector<Byte> v) {
|
||||
@ -773,6 +789,15 @@ final class Byte512Vector extends ByteVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Mask compress() {
|
||||
return (Byte512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Byte512Vector.class, Byte512Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -972,29 +997,15 @@ final class Byte512Vector extends ByteVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
|
||||
return super.fromByteArray0Template(Byte512Mask.class, a, offset, (Byte512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
|
||||
return super.fromByteBuffer0Template(Byte512Mask.class, bb, offset, (Byte512Mask) m); // specialize
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
|
||||
return super.fromMemorySegment0Template(Byte512Mask.class, ms, offset, (Byte512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -1022,22 +1033,8 @@ final class Byte512Vector extends ByteVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
|
||||
super.intoByteArray0Template(Byte512Mask.class, a, offset, (Byte512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
|
||||
super.intoByteBuffer0Template(Byte512Mask.class, bb, offset, (Byte512Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
|
||||
super.intoMemorySegment0Template(Byte512Mask.class, ms, offset, (Byte512Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -1046,3 +1043,4 @@ final class Byte512Vector extends ByteVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Byte64Vector extends ByteVector {
|
||||
(Byte64Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector compress(VectorMask<Byte> m) {
|
||||
return (Byte64Vector)
|
||||
super.compressTemplate(Byte64Mask.class,
|
||||
(Byte64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector expand(VectorMask<Byte> m) {
|
||||
return (Byte64Vector)
|
||||
super.expandTemplate(Byte64Mask.class,
|
||||
(Byte64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector selectFrom(Vector<Byte> v) {
|
||||
@ -661,6 +677,15 @@ final class Byte64Vector extends ByteVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Mask compress() {
|
||||
return (Byte64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Byte64Vector.class, Byte64Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -860,29 +885,15 @@ final class Byte64Vector extends ByteVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
|
||||
return super.fromByteArray0Template(Byte64Mask.class, a, offset, (Byte64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
|
||||
return super.fromByteBuffer0Template(Byte64Mask.class, bb, offset, (Byte64Mask) m); // specialize
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
|
||||
return super.fromMemorySegment0Template(Byte64Mask.class, ms, offset, (Byte64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -910,22 +921,8 @@ final class Byte64Vector extends ByteVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
|
||||
super.intoByteArray0Template(Byte64Mask.class, a, offset, (Byte64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
|
||||
super.intoByteBuffer0Template(Byte64Mask.class, bb, offset, (Byte64Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
|
||||
super.intoMemorySegment0Template(Byte64Mask.class, ms, offset, (Byte64Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -934,3 +931,4 @@ final class Byte64Vector extends ByteVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class ByteMaxVector extends ByteVector {
|
||||
(ByteMaxVector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector compress(VectorMask<Byte> m) {
|
||||
return (ByteMaxVector)
|
||||
super.compressTemplate(ByteMaxMask.class,
|
||||
(ByteMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector expand(VectorMask<Byte> m) {
|
||||
return (ByteMaxVector)
|
||||
super.expandTemplate(ByteMaxMask.class,
|
||||
(ByteMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector selectFrom(Vector<Byte> v) {
|
||||
@ -647,6 +663,15 @@ final class ByteMaxVector extends ByteVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxMask compress() {
|
||||
return (ByteMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
ByteMaxVector.class, ByteMaxMask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -846,29 +871,15 @@ final class ByteMaxVector extends ByteVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
|
||||
return super.fromByteArray0Template(ByteMaxMask.class, a, offset, (ByteMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
|
||||
return super.fromByteBuffer0Template(ByteMaxMask.class, bb, offset, (ByteMaxMask) m); // specialize
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
|
||||
return super.fromMemorySegment0Template(ByteMaxMask.class, ms, offset, (ByteMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -896,22 +907,8 @@ final class ByteMaxVector extends ByteVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m) {
|
||||
super.intoByteArray0Template(ByteMaxMask.class, a, offset, (ByteMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m) {
|
||||
super.intoByteBuffer0Template(ByteMaxMask.class, bb, offset, (ByteMaxMask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m) {
|
||||
super.intoMemorySegment0Template(ByteMaxMask.class, ms, offset, (ByteMaxMask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -920,3 +917,4 @@ final class ByteMaxVector extends ByteVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,14 +24,14 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ReadOnlyBufferException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.UnaryOperator;
|
||||
|
||||
import jdk.internal.foreign.AbstractMemorySegmentImpl;
|
||||
import jdk.internal.misc.ScopedMemoryAccess;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
@ -57,6 +57,8 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
|
||||
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
|
||||
|
||||
static final ValueLayout.OfByte ELEMENT_LAYOUT = ValueLayout.JAVA_BYTE.withBitAlignment(8);
|
||||
|
||||
@ForceInline
|
||||
static int opCode(Operator op) {
|
||||
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
|
||||
@ -351,6 +353,45 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
interface FLdLongOp {
|
||||
byte apply(MemorySegment memory, long offset, int i);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
ByteVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
//dummy; no vec = vec();
|
||||
byte[] res = new byte[length()];
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
ByteVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Byte> m,
|
||||
FLdLongOp f) {
|
||||
//byte[] vec = vec();
|
||||
byte[] res = new byte[length()];
|
||||
boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
if (mbits[i]) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
static byte memorySegmentGet(MemorySegment ms, long o, int i) {
|
||||
return ms.get(ELEMENT_LAYOUT, o + i * 1L);
|
||||
}
|
||||
|
||||
interface FStOp<M> {
|
||||
void apply(M memory, int offset, int i, byte a);
|
||||
}
|
||||
@ -381,6 +422,40 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
}
|
||||
}
|
||||
|
||||
interface FStLongOp {
|
||||
void apply(MemorySegment memory, long offset, int i, byte a);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
FStLongOp f) {
|
||||
byte[] vec = vec();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Byte> m,
|
||||
FStLongOp f) {
|
||||
byte[] vec = vec();
|
||||
boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
if (mbits[i]) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void memorySegmentSet(MemorySegment ms, long o, int i, byte e) {
|
||||
ms.set(ELEMENT_LAYOUT, o + i * 1L, e);
|
||||
}
|
||||
|
||||
// Binary test
|
||||
|
||||
/*package-private*/
|
||||
@ -431,6 +506,36 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
return ((byte)bits);
|
||||
}
|
||||
|
||||
static ByteVector expandHelper(Vector<Byte> v, VectorMask<Byte> m) {
|
||||
VectorSpecies<Byte> vsp = m.vectorSpecies();
|
||||
ByteVector r = (ByteVector) vsp.zero();
|
||||
ByteVector vi = (ByteVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(i, vi.lane(j++));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static ByteVector compressHelper(Vector<Byte> v, VectorMask<Byte> m) {
|
||||
VectorSpecies<Byte> vsp = m.vectorSpecies();
|
||||
ByteVector r = (ByteVector) vsp.zero();
|
||||
ByteVector vi = (ByteVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(j++, vi.lane(i));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// Static factories (other than memory operations)
|
||||
|
||||
// Note: A surprising behavior in javadoc
|
||||
@ -620,6 +725,16 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
v0.uOp(m, (i, a) -> (byte) -a);
|
||||
case VECTOR_OP_ABS: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (byte) Math.abs(a));
|
||||
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (byte) bitCount(a));
|
||||
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (byte) numberOfTrailingZeros(a));
|
||||
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (byte) numberOfLeadingZeros(a));
|
||||
case VECTOR_OP_REVERSE: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> reverse(a));
|
||||
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> a);
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
@ -1746,6 +1861,25 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
return lanewise(ABS);
|
||||
}
|
||||
|
||||
static int bitCount(byte a) {
|
||||
return Integer.bitCount((int)a & 0xFF);
|
||||
}
|
||||
static int numberOfTrailingZeros(byte a) {
|
||||
return a != 0 ? Integer.numberOfTrailingZeros(a) : 8;
|
||||
}
|
||||
static int numberOfLeadingZeros(byte a) {
|
||||
return a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0;
|
||||
}
|
||||
|
||||
static byte reverse(byte a) {
|
||||
if (a == 0 || a == -1) return a;
|
||||
|
||||
byte b = rotateLeft(a, 4);
|
||||
b = (byte) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
|
||||
b = (byte) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
|
||||
return b;
|
||||
}
|
||||
|
||||
// not (~)
|
||||
/**
|
||||
* Computes the bitwise logical complement ({@code ~})
|
||||
@ -2372,6 +2506,45 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
ByteVector::toShuffle0);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
ByteVector compress(VectorMask<Byte> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Byte>>
|
||||
ByteVector compressTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (ByteVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
|
||||
byte.class, length(), this, m,
|
||||
(v1, m1) -> compressHelper(v1, m1));
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
ByteVector expand(VectorMask<Byte> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Byte>>
|
||||
ByteVector expandTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (ByteVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
|
||||
byte.class, length(), this, m,
|
||||
(v1, m1) -> expandHelper(v1, m1));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@ -2784,90 +2957,6 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
ByteVector fromByteArray(VectorSpecies<Byte> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
|
||||
ByteSpecies vsp = (ByteSpecies) species;
|
||||
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code byte} (zero).
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
ByteVector fromByteArray(VectorSpecies<Byte> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Byte> m) {
|
||||
ByteSpecies vsp = (ByteSpecies) species;
|
||||
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
|
||||
ByteBuffer wb = wrapper(a, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
|
||||
(wb_, o, i) -> wb_.get(o + i * 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from an array of type {@code byte[]}
|
||||
* starting at an offset.
|
||||
@ -3174,44 +3263,49 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer.
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
|
||||
* fromMemorySegment()} as follows:
|
||||
* <pre>{@code
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* return fromMemorySegment(species, ms, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*1 < 0}
|
||||
* or {@code offset+N*1 >= bb.limit()}
|
||||
* or {@code offset+N*1 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
ByteVector fromByteBuffer(VectorSpecies<Byte> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
|
||||
ByteVector fromMemorySegment(VectorSpecies<Byte> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
|
||||
ByteSpecies vsp = (ByteSpecies) species;
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code byte} (zero).
|
||||
@ -3222,12 +3316,11 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
* <p>
|
||||
* The following pseudocode illustrates the behavior:
|
||||
* <pre>{@code
|
||||
* ByteBuffer eb = bb.duplicate()
|
||||
* .position(offset);
|
||||
* var slice = ms.asSlice(offset);
|
||||
* byte[] ar = new byte[species.length()];
|
||||
* for (int n = 0; n < ar.length; n++) {
|
||||
* if (m.laneIsSet(n)) {
|
||||
* ar[n] = eb.get(n);
|
||||
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_BYTE.withBitAlignment(8), n);
|
||||
* }
|
||||
* }
|
||||
* ByteVector r = ByteVector.fromArray(species, ar, 0);
|
||||
@ -3236,33 +3329,36 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
* The byte order argument is ignored.
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*1 < 0}
|
||||
* or {@code offset+N*1 >= bb.limit()}
|
||||
* or {@code offset+N*1 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
ByteVector fromByteBuffer(VectorSpecies<Byte> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Byte> m) {
|
||||
ByteVector fromMemorySegment(VectorSpecies<Byte> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Byte> m) {
|
||||
ByteSpecies vsp = (ByteSpecies) species;
|
||||
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
|
||||
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
|
||||
ByteBuffer wb = wrapper(bb, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Byte>)m,
|
||||
(wb_, o, i) -> wb_.get(o + i * 1));
|
||||
checkMaskFromIndexSize(offset, vsp, m, 1, ms.byteSize());
|
||||
return vsp.ldLongOp(ms, offset, m, ByteVector::memorySegmentGet);
|
||||
}
|
||||
|
||||
// Memory store operations
|
||||
@ -3292,7 +3388,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
this,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3443,7 +3539,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
normalized,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
|
||||
}
|
||||
|
||||
@ -3581,67 +3677,40 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, byteSize(), a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Byte> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteArray(a, offset, bo);
|
||||
} else {
|
||||
ByteSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset, m);
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
|
||||
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
if (ScopedMemoryAccess.isReadOnly(bb)) {
|
||||
throw new ReadOnlyBufferException();
|
||||
}
|
||||
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Byte> m) {
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Byte> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteBuffer(bb, offset, bo);
|
||||
intoMemorySegment(ms, offset, bo);
|
||||
} else {
|
||||
if (bb.isReadOnly()) {
|
||||
throw new ReadOnlyBufferException();
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
ByteSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
|
||||
checkMaskFromIndexSize(offset, vsp, m, 1, ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3675,7 +3744,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3692,7 +3761,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3709,7 +3778,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, booleanArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
|
||||
}
|
||||
|
||||
@ -3726,78 +3795,37 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, booleanArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
|
||||
}
|
||||
|
||||
@Override
|
||||
abstract
|
||||
ByteVector fromByteArray0(byte[] a, int offset);
|
||||
ByteVector fromMemorySegment0(MemorySegment bb, long offset);
|
||||
@ForceInline
|
||||
final
|
||||
ByteVector fromByteArray0Template(byte[] a, int offset) {
|
||||
ByteVector fromMemorySegment0Template(MemorySegment ms, long offset) {
|
||||
ByteSpecies vsp = vspecies();
|
||||
return VectorSupport.load(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.get(o + i * 1));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
ByteVector fromByteArray0(byte[] a, int offset, VectorMask<Byte> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Byte>>
|
||||
ByteVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
ByteSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return VectorSupport.loadMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.get(o + i * 1));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
|
||||
ByteSpecies vsp = vspecies();
|
||||
return ScopedMemoryAccess.loadFromByteBuffer(
|
||||
return ScopedMemoryAccess.loadFromMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, vsp,
|
||||
(buf, off, s) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.get(o + i * 1));
|
||||
(AbstractMemorySegmentImpl) ms, offset, vsp,
|
||||
(msp, off, s) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, ByteVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
|
||||
ByteVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Byte> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Byte>>
|
||||
ByteVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
ByteVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
ByteSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return ScopedMemoryAccess.loadFromByteBufferMasked(
|
||||
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, m, vsp,
|
||||
(buf, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.get(o + i * 1));
|
||||
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
|
||||
(msp, off, s, vm) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, vm, ByteVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3816,7 +3844,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_+i] = e));
|
||||
}
|
||||
|
||||
@ -3833,7 +3861,7 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3852,75 +3880,37 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
a, booleanArrayAddress(a, offset),
|
||||
normalized, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteArray0Template(byte[] a, int offset) {
|
||||
void intoMemorySegment0(MemorySegment ms, long offset) {
|
||||
ByteSpecies vsp = vspecies();
|
||||
VectorSupport.store(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(tb_, o, i, e) -> tb_.put(o + i * 1, e));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Byte> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Byte>>
|
||||
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
ByteSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
VectorSupport.storeMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(tb_, o, i, e) -> tb_.put(o + i * 1, e));
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset) {
|
||||
ByteSpecies vsp = vspecies();
|
||||
ScopedMemoryAccess.storeIntoByteBuffer(
|
||||
ScopedMemoryAccess.storeIntoMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
this, bb, offset,
|
||||
(buf, off, v) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(wb_, o, i, e) -> wb_.put(o + i * 1, e));
|
||||
this,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, ByteVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Byte> m);
|
||||
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Byte> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Byte>>
|
||||
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
ByteSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
ScopedMemoryAccess.storeIntoByteBufferMasked(
|
||||
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
this, m, bb, offset,
|
||||
(buf, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(wb_, o, i, e) -> wb_.put(o + i * 1, e));
|
||||
this, m,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v, vm) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, vm, ByteVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3937,6 +3927,16 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
private static
|
||||
void checkMaskFromIndexSize(long offset,
|
||||
ByteSpecies vsp,
|
||||
VectorMask<Byte> m,
|
||||
int scale,
|
||||
long limit) {
|
||||
((AbstractMask<Byte>)m)
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private void conditionalStoreNYI(int offset,
|
||||
ByteSpecies vsp,
|
||||
@ -4256,6 +4256,21 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
return dummyVector().ldOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
ByteVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
ByteVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Byte> m,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
<M> void stOp(M memory, int offset, FStOp<M> f) {
|
||||
@ -4270,6 +4285,20 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
dummyVector().stOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
AbstractMask<Byte> m,
|
||||
FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
// N.B. Make sure these constant vectors and
|
||||
// masks load up correctly into registers.
|
||||
//
|
||||
@ -4383,3 +4412,4 @@ public abstract class ByteVector extends AbstractVector<Byte> {
|
||||
public static final VectorSpecies<Byte> SPECIES_PREFERRED
|
||||
= (ByteSpecies) VectorSpecies.ofPreferred(byte.class);
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -461,6 +461,22 @@ final class Double128Vector extends DoubleVector {
|
||||
(Double128Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector compress(VectorMask<Double> m) {
|
||||
return (Double128Vector)
|
||||
super.compressTemplate(Double128Mask.class,
|
||||
(Double128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector expand(VectorMask<Double> m) {
|
||||
return (Double128Vector)
|
||||
super.expandTemplate(Double128Mask.class,
|
||||
(Double128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector selectFrom(Vector<Double> v) {
|
||||
@ -638,6 +654,15 @@ final class Double128Vector extends DoubleVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Mask compress() {
|
||||
return (Double128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Double128Vector.class, Double128Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -830,29 +855,15 @@ final class Double128Vector extends DoubleVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
|
||||
return super.fromByteArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
|
||||
return super.fromByteBuffer0Template(Double128Mask.class, bb, offset, (Double128Mask) m); // specialize
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
|
||||
return super.fromMemorySegment0Template(Double128Mask.class, ms, offset, (Double128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -880,22 +891,8 @@ final class Double128Vector extends DoubleVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
|
||||
super.intoByteArray0Template(Double128Mask.class, a, offset, (Double128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
|
||||
super.intoByteBuffer0Template(Double128Mask.class, bb, offset, (Double128Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
|
||||
super.intoMemorySegment0Template(Double128Mask.class, ms, offset, (Double128Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -904,3 +901,4 @@ final class Double128Vector extends DoubleVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -461,6 +461,22 @@ final class Double256Vector extends DoubleVector {
|
||||
(Double256Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector compress(VectorMask<Double> m) {
|
||||
return (Double256Vector)
|
||||
super.compressTemplate(Double256Mask.class,
|
||||
(Double256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector expand(VectorMask<Double> m) {
|
||||
return (Double256Vector)
|
||||
super.expandTemplate(Double256Mask.class,
|
||||
(Double256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector selectFrom(Vector<Double> v) {
|
||||
@ -642,6 +658,15 @@ final class Double256Vector extends DoubleVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Mask compress() {
|
||||
return (Double256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Double256Vector.class, Double256Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -834,29 +859,15 @@ final class Double256Vector extends DoubleVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
|
||||
return super.fromByteArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
|
||||
return super.fromByteBuffer0Template(Double256Mask.class, bb, offset, (Double256Mask) m); // specialize
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
|
||||
return super.fromMemorySegment0Template(Double256Mask.class, ms, offset, (Double256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -884,22 +895,8 @@ final class Double256Vector extends DoubleVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
|
||||
super.intoByteArray0Template(Double256Mask.class, a, offset, (Double256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
|
||||
super.intoByteBuffer0Template(Double256Mask.class, bb, offset, (Double256Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
|
||||
super.intoMemorySegment0Template(Double256Mask.class, ms, offset, (Double256Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -908,3 +905,4 @@ final class Double256Vector extends DoubleVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -461,6 +461,22 @@ final class Double512Vector extends DoubleVector {
|
||||
(Double512Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector compress(VectorMask<Double> m) {
|
||||
return (Double512Vector)
|
||||
super.compressTemplate(Double512Mask.class,
|
||||
(Double512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector expand(VectorMask<Double> m) {
|
||||
return (Double512Vector)
|
||||
super.expandTemplate(Double512Mask.class,
|
||||
(Double512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector selectFrom(Vector<Double> v) {
|
||||
@ -650,6 +666,15 @@ final class Double512Vector extends DoubleVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Mask compress() {
|
||||
return (Double512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Double512Vector.class, Double512Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -842,29 +867,15 @@ final class Double512Vector extends DoubleVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
|
||||
return super.fromByteArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
|
||||
return super.fromByteBuffer0Template(Double512Mask.class, bb, offset, (Double512Mask) m); // specialize
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
|
||||
return super.fromMemorySegment0Template(Double512Mask.class, ms, offset, (Double512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -892,22 +903,8 @@ final class Double512Vector extends DoubleVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
|
||||
super.intoByteArray0Template(Double512Mask.class, a, offset, (Double512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
|
||||
super.intoByteBuffer0Template(Double512Mask.class, bb, offset, (Double512Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
|
||||
super.intoMemorySegment0Template(Double512Mask.class, ms, offset, (Double512Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -916,3 +913,4 @@ final class Double512Vector extends DoubleVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -461,6 +461,22 @@ final class Double64Vector extends DoubleVector {
|
||||
(Double64Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector compress(VectorMask<Double> m) {
|
||||
return (Double64Vector)
|
||||
super.compressTemplate(Double64Mask.class,
|
||||
(Double64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector expand(VectorMask<Double> m) {
|
||||
return (Double64Vector)
|
||||
super.expandTemplate(Double64Mask.class,
|
||||
(Double64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector selectFrom(Vector<Double> v) {
|
||||
@ -636,6 +652,15 @@ final class Double64Vector extends DoubleVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Mask compress() {
|
||||
return (Double64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Double64Vector.class, Double64Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -828,29 +853,15 @@ final class Double64Vector extends DoubleVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
|
||||
return super.fromByteArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
|
||||
return super.fromByteBuffer0Template(Double64Mask.class, bb, offset, (Double64Mask) m); // specialize
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
|
||||
return super.fromMemorySegment0Template(Double64Mask.class, ms, offset, (Double64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -878,22 +889,8 @@ final class Double64Vector extends DoubleVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
|
||||
super.intoByteArray0Template(Double64Mask.class, a, offset, (Double64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
|
||||
super.intoByteBuffer0Template(Double64Mask.class, bb, offset, (Double64Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
|
||||
super.intoMemorySegment0Template(Double64Mask.class, ms, offset, (Double64Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -902,3 +899,4 @@ final class Double64Vector extends DoubleVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -461,6 +461,22 @@ final class DoubleMaxVector extends DoubleVector {
|
||||
(DoubleMaxVector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public DoubleMaxVector compress(VectorMask<Double> m) {
|
||||
return (DoubleMaxVector)
|
||||
super.compressTemplate(DoubleMaxMask.class,
|
||||
(DoubleMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public DoubleMaxVector expand(VectorMask<Double> m) {
|
||||
return (DoubleMaxVector)
|
||||
super.expandTemplate(DoubleMaxMask.class,
|
||||
(DoubleMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public DoubleMaxVector selectFrom(Vector<Double> v) {
|
||||
@ -635,6 +651,15 @@ final class DoubleMaxVector extends DoubleVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public DoubleMaxMask compress() {
|
||||
return (DoubleMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
DoubleMaxVector.class, DoubleMaxMask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -827,29 +852,15 @@ final class DoubleMaxVector extends DoubleVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m) {
|
||||
return super.fromByteArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
|
||||
return super.fromByteBuffer0Template(DoubleMaxMask.class, bb, offset, (DoubleMaxMask) m); // specialize
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
|
||||
return super.fromMemorySegment0Template(DoubleMaxMask.class, ms, offset, (DoubleMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -877,22 +888,8 @@ final class DoubleMaxVector extends DoubleVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m) {
|
||||
super.intoByteArray0Template(DoubleMaxMask.class, a, offset, (DoubleMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m) {
|
||||
super.intoByteBuffer0Template(DoubleMaxMask.class, bb, offset, (DoubleMaxMask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m) {
|
||||
super.intoMemorySegment0Template(DoubleMaxMask.class, ms, offset, (DoubleMaxMask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -901,3 +898,4 @@ final class DoubleMaxVector extends DoubleVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,14 +24,14 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ReadOnlyBufferException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.UnaryOperator;
|
||||
|
||||
import jdk.internal.foreign.AbstractMemorySegmentImpl;
|
||||
import jdk.internal.misc.ScopedMemoryAccess;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
@ -57,6 +57,8 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
|
||||
static final int FORBID_OPCODE_KIND = VO_NOFP;
|
||||
|
||||
static final ValueLayout.OfDouble ELEMENT_LAYOUT = ValueLayout.JAVA_DOUBLE.withBitAlignment(8);
|
||||
|
||||
@ForceInline
|
||||
static int opCode(Operator op) {
|
||||
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
|
||||
@ -351,6 +353,45 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
interface FLdLongOp {
|
||||
double apply(MemorySegment memory, long offset, int i);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
DoubleVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
//dummy; no vec = vec();
|
||||
double[] res = new double[length()];
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
DoubleVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Double> m,
|
||||
FLdLongOp f) {
|
||||
//double[] vec = vec();
|
||||
double[] res = new double[length()];
|
||||
boolean[] mbits = ((AbstractMask<Double>)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
if (mbits[i]) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
static double memorySegmentGet(MemorySegment ms, long o, int i) {
|
||||
return ms.get(ELEMENT_LAYOUT, o + i * 8L);
|
||||
}
|
||||
|
||||
interface FStOp<M> {
|
||||
void apply(M memory, int offset, int i, double a);
|
||||
}
|
||||
@ -381,6 +422,40 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
}
|
||||
}
|
||||
|
||||
interface FStLongOp {
|
||||
void apply(MemorySegment memory, long offset, int i, double a);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
FStLongOp f) {
|
||||
double[] vec = vec();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Double> m,
|
||||
FStLongOp f) {
|
||||
double[] vec = vec();
|
||||
boolean[] mbits = ((AbstractMask<Double>)m).getBits();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
if (mbits[i]) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void memorySegmentSet(MemorySegment ms, long o, int i, double e) {
|
||||
ms.set(ELEMENT_LAYOUT, o + i * 8L, e);
|
||||
}
|
||||
|
||||
// Binary test
|
||||
|
||||
/*package-private*/
|
||||
@ -420,6 +495,36 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
return Double.longBitsToDouble((long)bits);
|
||||
}
|
||||
|
||||
static DoubleVector expandHelper(Vector<Double> v, VectorMask<Double> m) {
|
||||
VectorSpecies<Double> vsp = m.vectorSpecies();
|
||||
DoubleVector r = (DoubleVector) vsp.zero();
|
||||
DoubleVector vi = (DoubleVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(i, vi.lane(j++));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static DoubleVector compressHelper(Vector<Double> v, VectorMask<Double> m) {
|
||||
VectorSpecies<Double> vsp = m.vectorSpecies();
|
||||
DoubleVector r = (DoubleVector) vsp.zero();
|
||||
DoubleVector vi = (DoubleVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(j++, vi.lane(i));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// Static factories (other than memory operations)
|
||||
|
||||
// Note: A surprising behavior in javadoc
|
||||
@ -1594,6 +1699,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
}
|
||||
|
||||
|
||||
|
||||
// sqrt
|
||||
/**
|
||||
* Computes the square root of this vector.
|
||||
@ -2241,6 +2347,45 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
DoubleVector::toShuffle0);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
DoubleVector compress(VectorMask<Double> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Double>>
|
||||
DoubleVector compressTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (DoubleVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
|
||||
double.class, length(), this, m,
|
||||
(v1, m1) -> compressHelper(v1, m1));
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
DoubleVector expand(VectorMask<Double> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Double>>
|
||||
DoubleVector expandTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (DoubleVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
|
||||
double.class, length(), this, m,
|
||||
(v1, m1) -> expandHelper(v1, m1));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@ -2609,90 +2754,6 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
return toArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
DoubleVector fromByteArray(VectorSpecies<Double> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
|
||||
DoubleSpecies vsp = (DoubleSpecies) species;
|
||||
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code double} (positive zero).
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
DoubleVector fromByteArray(VectorSpecies<Double> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Double> m) {
|
||||
DoubleSpecies vsp = (DoubleSpecies) species;
|
||||
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, a.length);
|
||||
ByteBuffer wb = wrapper(a, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Double>)m,
|
||||
(wb_, o, i) -> wb_.getDouble(o + i * 8));
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from an array of type {@code double[]}
|
||||
* starting at an offset.
|
||||
@ -2883,44 +2944,49 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer.
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
|
||||
* fromMemorySegment()} as follows:
|
||||
* <pre>{@code
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* return fromMemorySegment(species, ms, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*8 < 0}
|
||||
* or {@code offset+N*8 >= bb.limit()}
|
||||
* or {@code offset+N*8 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
DoubleVector fromByteBuffer(VectorSpecies<Double> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
|
||||
DoubleVector fromMemorySegment(VectorSpecies<Double> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
|
||||
DoubleSpecies vsp = (DoubleSpecies) species;
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code double} (positive zero).
|
||||
@ -2931,13 +2997,11 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
* <p>
|
||||
* The following pseudocode illustrates the behavior:
|
||||
* <pre>{@code
|
||||
* DoubleBuffer eb = bb.duplicate()
|
||||
* .position(offset)
|
||||
* .order(bo).asDoubleBuffer();
|
||||
* var slice = ms.asSlice(offset);
|
||||
* double[] ar = new double[species.length()];
|
||||
* for (int n = 0; n < ar.length; n++) {
|
||||
* if (m.laneIsSet(n)) {
|
||||
* ar[n] = eb.get(n);
|
||||
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_DOUBLE.withBitAlignment(8), n);
|
||||
* }
|
||||
* }
|
||||
* DoubleVector r = DoubleVector.fromArray(species, ar, 0);
|
||||
@ -2951,33 +3015,36 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
* the bytes of lane values.
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*8 < 0}
|
||||
* or {@code offset+N*8 >= bb.limit()}
|
||||
* or {@code offset+N*8 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
DoubleVector fromByteBuffer(VectorSpecies<Double> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Double> m) {
|
||||
DoubleVector fromMemorySegment(VectorSpecies<Double> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Double> m) {
|
||||
DoubleSpecies vsp = (DoubleSpecies) species;
|
||||
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
|
||||
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, bb.limit());
|
||||
ByteBuffer wb = wrapper(bb, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Double>)m,
|
||||
(wb_, o, i) -> wb_.getDouble(o + i * 8));
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, ms.byteSize());
|
||||
return vsp.ldLongOp(ms, offset, m, DoubleVector::memorySegmentGet);
|
||||
}
|
||||
|
||||
// Memory store operations
|
||||
@ -3007,7 +3074,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
this,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3167,67 +3234,40 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, byteSize(), a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Double> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteArray(a, offset, bo);
|
||||
} else {
|
||||
DoubleSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset, m);
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
|
||||
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
if (ScopedMemoryAccess.isReadOnly(bb)) {
|
||||
throw new ReadOnlyBufferException();
|
||||
}
|
||||
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Double> m) {
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Double> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteBuffer(bb, offset, bo);
|
||||
intoMemorySegment(ms, offset, bo);
|
||||
} else {
|
||||
if (bb.isReadOnly()) {
|
||||
throw new ReadOnlyBufferException();
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
DoubleSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3261,7 +3301,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3278,7 +3318,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3336,74 +3376,33 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
abstract
|
||||
DoubleVector fromByteArray0(byte[] a, int offset);
|
||||
DoubleVector fromMemorySegment0(MemorySegment bb, long offset);
|
||||
@ForceInline
|
||||
final
|
||||
DoubleVector fromByteArray0Template(byte[] a, int offset) {
|
||||
DoubleVector fromMemorySegment0Template(MemorySegment ms, long offset) {
|
||||
DoubleSpecies vsp = vspecies();
|
||||
return VectorSupport.load(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.getDouble(o + i * 8));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
DoubleVector fromByteArray0(byte[] a, int offset, VectorMask<Double> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Double>>
|
||||
DoubleVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
DoubleSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return VectorSupport.loadMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.getDouble(o + i * 8));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
DoubleVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
|
||||
DoubleSpecies vsp = vspecies();
|
||||
return ScopedMemoryAccess.loadFromByteBuffer(
|
||||
return ScopedMemoryAccess.loadFromMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, vsp,
|
||||
(buf, off, s) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.getDouble(o + i * 8));
|
||||
(AbstractMemorySegmentImpl) ms, offset, vsp,
|
||||
(msp, off, s) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, DoubleVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m);
|
||||
DoubleVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Double> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Double>>
|
||||
DoubleVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
DoubleVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
DoubleSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return ScopedMemoryAccess.loadFromByteBufferMasked(
|
||||
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, m, vsp,
|
||||
(buf, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.getDouble(o + i * 8));
|
||||
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
|
||||
(msp, off, s, vm) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, vm, DoubleVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3422,7 +3421,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_+i] = e));
|
||||
}
|
||||
|
||||
@ -3439,7 +3438,7 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3497,71 +3496,33 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
}
|
||||
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteArray0Template(byte[] a, int offset) {
|
||||
void intoMemorySegment0(MemorySegment ms, long offset) {
|
||||
DoubleSpecies vsp = vspecies();
|
||||
VectorSupport.store(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(tb_, o, i, e) -> tb_.putDouble(o + i * 8, e));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Double> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Double>>
|
||||
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
DoubleSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
VectorSupport.storeMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(tb_, o, i, e) -> tb_.putDouble(o + i * 8, e));
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset) {
|
||||
DoubleSpecies vsp = vspecies();
|
||||
ScopedMemoryAccess.storeIntoByteBuffer(
|
||||
ScopedMemoryAccess.storeIntoMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
this, bb, offset,
|
||||
(buf, off, v) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(wb_, o, i, e) -> wb_.putDouble(o + i * 8, e));
|
||||
this,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, DoubleVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Double> m);
|
||||
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Double> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Double>>
|
||||
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
DoubleSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
ScopedMemoryAccess.storeIntoByteBufferMasked(
|
||||
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
this, m, bb, offset,
|
||||
(buf, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(wb_, o, i, e) -> wb_.putDouble(o + i * 8, e));
|
||||
this, m,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v, vm) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, vm, DoubleVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3578,6 +3539,16 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
private static
|
||||
void checkMaskFromIndexSize(long offset,
|
||||
DoubleSpecies vsp,
|
||||
VectorMask<Double> m,
|
||||
int scale,
|
||||
long limit) {
|
||||
((AbstractMask<Double>)m)
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private void conditionalStoreNYI(int offset,
|
||||
DoubleSpecies vsp,
|
||||
@ -3888,6 +3859,21 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
return dummyVector().ldOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
DoubleVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
DoubleVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Double> m,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
<M> void stOp(M memory, int offset, FStOp<M> f) {
|
||||
@ -3902,6 +3888,20 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
dummyVector().stOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
AbstractMask<Double> m,
|
||||
FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
// N.B. Make sure these constant vectors and
|
||||
// masks load up correctly into registers.
|
||||
//
|
||||
@ -4015,3 +4015,4 @@ public abstract class DoubleVector extends AbstractVector<Double> {
|
||||
public static final VectorSpecies<Double> SPECIES_PREFERRED
|
||||
= (DoubleSpecies) VectorSpecies.ofPreferred(double.class);
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -461,6 +461,22 @@ final class Float128Vector extends FloatVector {
|
||||
(Float128Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float128Vector compress(VectorMask<Float> m) {
|
||||
return (Float128Vector)
|
||||
super.compressTemplate(Float128Mask.class,
|
||||
(Float128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float128Vector expand(VectorMask<Float> m) {
|
||||
return (Float128Vector)
|
||||
super.expandTemplate(Float128Mask.class,
|
||||
(Float128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float128Vector selectFrom(Vector<Float> v) {
|
||||
@ -642,6 +658,15 @@ final class Float128Vector extends FloatVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float128Mask compress() {
|
||||
return (Float128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Float128Vector.class, Float128Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -834,29 +859,15 @@ final class Float128Vector extends FloatVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
|
||||
return super.fromByteArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
|
||||
return super.fromByteBuffer0Template(Float128Mask.class, bb, offset, (Float128Mask) m); // specialize
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
|
||||
return super.fromMemorySegment0Template(Float128Mask.class, ms, offset, (Float128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -884,22 +895,8 @@ final class Float128Vector extends FloatVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
|
||||
super.intoByteArray0Template(Float128Mask.class, a, offset, (Float128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
|
||||
super.intoByteBuffer0Template(Float128Mask.class, bb, offset, (Float128Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
|
||||
super.intoMemorySegment0Template(Float128Mask.class, ms, offset, (Float128Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -908,3 +905,4 @@ final class Float128Vector extends FloatVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -461,6 +461,22 @@ final class Float256Vector extends FloatVector {
|
||||
(Float256Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float256Vector compress(VectorMask<Float> m) {
|
||||
return (Float256Vector)
|
||||
super.compressTemplate(Float256Mask.class,
|
||||
(Float256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float256Vector expand(VectorMask<Float> m) {
|
||||
return (Float256Vector)
|
||||
super.expandTemplate(Float256Mask.class,
|
||||
(Float256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float256Vector selectFrom(Vector<Float> v) {
|
||||
@ -650,6 +666,15 @@ final class Float256Vector extends FloatVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float256Mask compress() {
|
||||
return (Float256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Float256Vector.class, Float256Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -842,29 +867,15 @@ final class Float256Vector extends FloatVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
|
||||
return super.fromByteArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
|
||||
return super.fromByteBuffer0Template(Float256Mask.class, bb, offset, (Float256Mask) m); // specialize
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
|
||||
return super.fromMemorySegment0Template(Float256Mask.class, ms, offset, (Float256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -892,22 +903,8 @@ final class Float256Vector extends FloatVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
|
||||
super.intoByteArray0Template(Float256Mask.class, a, offset, (Float256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
|
||||
super.intoByteBuffer0Template(Float256Mask.class, bb, offset, (Float256Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
|
||||
super.intoMemorySegment0Template(Float256Mask.class, ms, offset, (Float256Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -916,3 +913,4 @@ final class Float256Vector extends FloatVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -461,6 +461,22 @@ final class Float512Vector extends FloatVector {
|
||||
(Float512Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float512Vector compress(VectorMask<Float> m) {
|
||||
return (Float512Vector)
|
||||
super.compressTemplate(Float512Mask.class,
|
||||
(Float512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float512Vector expand(VectorMask<Float> m) {
|
||||
return (Float512Vector)
|
||||
super.expandTemplate(Float512Mask.class,
|
||||
(Float512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float512Vector selectFrom(Vector<Float> v) {
|
||||
@ -666,6 +682,15 @@ final class Float512Vector extends FloatVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float512Mask compress() {
|
||||
return (Float512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Float512Vector.class, Float512Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -858,29 +883,15 @@ final class Float512Vector extends FloatVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
|
||||
return super.fromByteArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
|
||||
return super.fromByteBuffer0Template(Float512Mask.class, bb, offset, (Float512Mask) m); // specialize
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
|
||||
return super.fromMemorySegment0Template(Float512Mask.class, ms, offset, (Float512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -908,22 +919,8 @@ final class Float512Vector extends FloatVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
|
||||
super.intoByteArray0Template(Float512Mask.class, a, offset, (Float512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
|
||||
super.intoByteBuffer0Template(Float512Mask.class, bb, offset, (Float512Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
|
||||
super.intoMemorySegment0Template(Float512Mask.class, ms, offset, (Float512Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -932,3 +929,4 @@ final class Float512Vector extends FloatVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -461,6 +461,22 @@ final class Float64Vector extends FloatVector {
|
||||
(Float64Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float64Vector compress(VectorMask<Float> m) {
|
||||
return (Float64Vector)
|
||||
super.compressTemplate(Float64Mask.class,
|
||||
(Float64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float64Vector expand(VectorMask<Float> m) {
|
||||
return (Float64Vector)
|
||||
super.expandTemplate(Float64Mask.class,
|
||||
(Float64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float64Vector selectFrom(Vector<Float> v) {
|
||||
@ -638,6 +654,15 @@ final class Float64Vector extends FloatVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Float64Mask compress() {
|
||||
return (Float64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Float64Vector.class, Float64Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -830,29 +855,15 @@ final class Float64Vector extends FloatVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
|
||||
return super.fromByteArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
|
||||
return super.fromByteBuffer0Template(Float64Mask.class, bb, offset, (Float64Mask) m); // specialize
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
|
||||
return super.fromMemorySegment0Template(Float64Mask.class, ms, offset, (Float64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -880,22 +891,8 @@ final class Float64Vector extends FloatVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
|
||||
super.intoByteArray0Template(Float64Mask.class, a, offset, (Float64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
|
||||
super.intoByteBuffer0Template(Float64Mask.class, bb, offset, (Float64Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
|
||||
super.intoMemorySegment0Template(Float64Mask.class, ms, offset, (Float64Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -904,3 +901,4 @@ final class Float64Vector extends FloatVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -461,6 +461,22 @@ final class FloatMaxVector extends FloatVector {
|
||||
(FloatMaxVector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public FloatMaxVector compress(VectorMask<Float> m) {
|
||||
return (FloatMaxVector)
|
||||
super.compressTemplate(FloatMaxMask.class,
|
||||
(FloatMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public FloatMaxVector expand(VectorMask<Float> m) {
|
||||
return (FloatMaxVector)
|
||||
super.expandTemplate(FloatMaxMask.class,
|
||||
(FloatMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public FloatMaxVector selectFrom(Vector<Float> v) {
|
||||
@ -635,6 +651,15 @@ final class FloatMaxVector extends FloatVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public FloatMaxMask compress() {
|
||||
return (FloatMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
FloatMaxVector.class, FloatMaxMask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -827,29 +852,15 @@ final class FloatMaxVector extends FloatVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m) {
|
||||
return super.fromByteArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
|
||||
return super.fromByteBuffer0Template(FloatMaxMask.class, bb, offset, (FloatMaxMask) m); // specialize
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
|
||||
return super.fromMemorySegment0Template(FloatMaxMask.class, ms, offset, (FloatMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -877,22 +888,8 @@ final class FloatMaxVector extends FloatVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m) {
|
||||
super.intoByteArray0Template(FloatMaxMask.class, a, offset, (FloatMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m) {
|
||||
super.intoByteBuffer0Template(FloatMaxMask.class, bb, offset, (FloatMaxMask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m) {
|
||||
super.intoMemorySegment0Template(FloatMaxMask.class, ms, offset, (FloatMaxMask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -901,3 +898,4 @@ final class FloatMaxVector extends FloatVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,14 +24,14 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ReadOnlyBufferException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.UnaryOperator;
|
||||
|
||||
import jdk.internal.foreign.AbstractMemorySegmentImpl;
|
||||
import jdk.internal.misc.ScopedMemoryAccess;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
@ -57,6 +57,8 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
|
||||
static final int FORBID_OPCODE_KIND = VO_NOFP;
|
||||
|
||||
static final ValueLayout.OfFloat ELEMENT_LAYOUT = ValueLayout.JAVA_FLOAT.withBitAlignment(8);
|
||||
|
||||
@ForceInline
|
||||
static int opCode(Operator op) {
|
||||
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
|
||||
@ -351,6 +353,45 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
interface FLdLongOp {
|
||||
float apply(MemorySegment memory, long offset, int i);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
FloatVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
//dummy; no vec = vec();
|
||||
float[] res = new float[length()];
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
FloatVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Float> m,
|
||||
FLdLongOp f) {
|
||||
//float[] vec = vec();
|
||||
float[] res = new float[length()];
|
||||
boolean[] mbits = ((AbstractMask<Float>)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
if (mbits[i]) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
static float memorySegmentGet(MemorySegment ms, long o, int i) {
|
||||
return ms.get(ELEMENT_LAYOUT, o + i * 4L);
|
||||
}
|
||||
|
||||
interface FStOp<M> {
|
||||
void apply(M memory, int offset, int i, float a);
|
||||
}
|
||||
@ -381,6 +422,40 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
}
|
||||
}
|
||||
|
||||
interface FStLongOp {
|
||||
void apply(MemorySegment memory, long offset, int i, float a);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
FStLongOp f) {
|
||||
float[] vec = vec();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Float> m,
|
||||
FStLongOp f) {
|
||||
float[] vec = vec();
|
||||
boolean[] mbits = ((AbstractMask<Float>)m).getBits();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
if (mbits[i]) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void memorySegmentSet(MemorySegment ms, long o, int i, float e) {
|
||||
ms.set(ELEMENT_LAYOUT, o + i * 4L, e);
|
||||
}
|
||||
|
||||
// Binary test
|
||||
|
||||
/*package-private*/
|
||||
@ -420,6 +495,36 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
return Float.intBitsToFloat((int)bits);
|
||||
}
|
||||
|
||||
static FloatVector expandHelper(Vector<Float> v, VectorMask<Float> m) {
|
||||
VectorSpecies<Float> vsp = m.vectorSpecies();
|
||||
FloatVector r = (FloatVector) vsp.zero();
|
||||
FloatVector vi = (FloatVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(i, vi.lane(j++));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static FloatVector compressHelper(Vector<Float> v, VectorMask<Float> m) {
|
||||
VectorSpecies<Float> vsp = m.vectorSpecies();
|
||||
FloatVector r = (FloatVector) vsp.zero();
|
||||
FloatVector vi = (FloatVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(j++, vi.lane(i));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// Static factories (other than memory operations)
|
||||
|
||||
// Note: A surprising behavior in javadoc
|
||||
@ -1602,6 +1707,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
}
|
||||
|
||||
|
||||
|
||||
// sqrt
|
||||
/**
|
||||
* Computes the square root of this vector.
|
||||
@ -2253,6 +2359,45 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
FloatVector::toShuffle0);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
FloatVector compress(VectorMask<Float> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Float>>
|
||||
FloatVector compressTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (FloatVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
|
||||
float.class, length(), this, m,
|
||||
(v1, m1) -> compressHelper(v1, m1));
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
FloatVector expand(VectorMask<Float> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Float>>
|
||||
FloatVector expandTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (FloatVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
|
||||
float.class, length(), this, m,
|
||||
(v1, m1) -> expandHelper(v1, m1));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@ -2633,90 +2778,6 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
FloatVector fromByteArray(VectorSpecies<Float> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
|
||||
FloatSpecies vsp = (FloatSpecies) species;
|
||||
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code float} (positive zero).
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
FloatVector fromByteArray(VectorSpecies<Float> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Float> m) {
|
||||
FloatSpecies vsp = (FloatSpecies) species;
|
||||
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
|
||||
ByteBuffer wb = wrapper(a, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Float>)m,
|
||||
(wb_, o, i) -> wb_.getFloat(o + i * 4));
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from an array of type {@code float[]}
|
||||
* starting at an offset.
|
||||
@ -2889,44 +2950,49 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer.
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
|
||||
* fromMemorySegment()} as follows:
|
||||
* <pre>{@code
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* return fromMemorySegment(species, ms, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*4 < 0}
|
||||
* or {@code offset+N*4 >= bb.limit()}
|
||||
* or {@code offset+N*4 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
FloatVector fromByteBuffer(VectorSpecies<Float> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
|
||||
FloatVector fromMemorySegment(VectorSpecies<Float> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
|
||||
FloatSpecies vsp = (FloatSpecies) species;
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code float} (positive zero).
|
||||
@ -2937,13 +3003,11 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
* <p>
|
||||
* The following pseudocode illustrates the behavior:
|
||||
* <pre>{@code
|
||||
* FloatBuffer eb = bb.duplicate()
|
||||
* .position(offset)
|
||||
* .order(bo).asFloatBuffer();
|
||||
* var slice = ms.asSlice(offset);
|
||||
* float[] ar = new float[species.length()];
|
||||
* for (int n = 0; n < ar.length; n++) {
|
||||
* if (m.laneIsSet(n)) {
|
||||
* ar[n] = eb.get(n);
|
||||
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_FLOAT.withBitAlignment(8), n);
|
||||
* }
|
||||
* }
|
||||
* FloatVector r = FloatVector.fromArray(species, ar, 0);
|
||||
@ -2957,33 +3021,36 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
* the bytes of lane values.
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*4 < 0}
|
||||
* or {@code offset+N*4 >= bb.limit()}
|
||||
* or {@code offset+N*4 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
FloatVector fromByteBuffer(VectorSpecies<Float> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Float> m) {
|
||||
FloatVector fromMemorySegment(VectorSpecies<Float> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Float> m) {
|
||||
FloatSpecies vsp = (FloatSpecies) species;
|
||||
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
|
||||
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
|
||||
ByteBuffer wb = wrapper(bb, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Float>)m,
|
||||
(wb_, o, i) -> wb_.getFloat(o + i * 4));
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, ms.byteSize());
|
||||
return vsp.ldLongOp(ms, offset, m, FloatVector::memorySegmentGet);
|
||||
}
|
||||
|
||||
// Memory store operations
|
||||
@ -3013,7 +3080,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
this,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3154,67 +3221,40 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, byteSize(), a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Float> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteArray(a, offset, bo);
|
||||
} else {
|
||||
FloatSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset, m);
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
|
||||
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
if (ScopedMemoryAccess.isReadOnly(bb)) {
|
||||
throw new ReadOnlyBufferException();
|
||||
}
|
||||
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Float> m) {
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Float> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteBuffer(bb, offset, bo);
|
||||
intoMemorySegment(ms, offset, bo);
|
||||
} else {
|
||||
if (bb.isReadOnly()) {
|
||||
throw new ReadOnlyBufferException();
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
FloatSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3248,7 +3288,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3265,7 +3305,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3305,74 +3345,33 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
abstract
|
||||
FloatVector fromByteArray0(byte[] a, int offset);
|
||||
FloatVector fromMemorySegment0(MemorySegment bb, long offset);
|
||||
@ForceInline
|
||||
final
|
||||
FloatVector fromByteArray0Template(byte[] a, int offset) {
|
||||
FloatVector fromMemorySegment0Template(MemorySegment ms, long offset) {
|
||||
FloatSpecies vsp = vspecies();
|
||||
return VectorSupport.load(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.getFloat(o + i * 4));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
FloatVector fromByteArray0(byte[] a, int offset, VectorMask<Float> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Float>>
|
||||
FloatVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
FloatSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return VectorSupport.loadMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.getFloat(o + i * 4));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
FloatVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
|
||||
FloatSpecies vsp = vspecies();
|
||||
return ScopedMemoryAccess.loadFromByteBuffer(
|
||||
return ScopedMemoryAccess.loadFromMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, vsp,
|
||||
(buf, off, s) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.getFloat(o + i * 4));
|
||||
(AbstractMemorySegmentImpl) ms, offset, vsp,
|
||||
(msp, off, s) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, FloatVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
FloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m);
|
||||
FloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Float>>
|
||||
FloatVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
FloatVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
FloatSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return ScopedMemoryAccess.loadFromByteBufferMasked(
|
||||
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, m, vsp,
|
||||
(buf, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.getFloat(o + i * 4));
|
||||
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
|
||||
(msp, off, s, vm) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, vm, FloatVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3391,7 +3390,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_+i] = e));
|
||||
}
|
||||
|
||||
@ -3408,7 +3407,7 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3447,71 +3446,33 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
}
|
||||
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteArray0Template(byte[] a, int offset) {
|
||||
void intoMemorySegment0(MemorySegment ms, long offset) {
|
||||
FloatSpecies vsp = vspecies();
|
||||
VectorSupport.store(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(tb_, o, i, e) -> tb_.putFloat(o + i * 4, e));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Float> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Float>>
|
||||
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
FloatSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
VectorSupport.storeMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(tb_, o, i, e) -> tb_.putFloat(o + i * 4, e));
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset) {
|
||||
FloatSpecies vsp = vspecies();
|
||||
ScopedMemoryAccess.storeIntoByteBuffer(
|
||||
ScopedMemoryAccess.storeIntoMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
this, bb, offset,
|
||||
(buf, off, v) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
|
||||
this,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, FloatVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Float> m);
|
||||
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Float> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Float>>
|
||||
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
FloatSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
ScopedMemoryAccess.storeIntoByteBufferMasked(
|
||||
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
this, m, bb, offset,
|
||||
(buf, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(wb_, o, i, e) -> wb_.putFloat(o + i * 4, e));
|
||||
this, m,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v, vm) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, vm, FloatVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3528,6 +3489,16 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
private static
|
||||
void checkMaskFromIndexSize(long offset,
|
||||
FloatSpecies vsp,
|
||||
VectorMask<Float> m,
|
||||
int scale,
|
||||
long limit) {
|
||||
((AbstractMask<Float>)m)
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private void conditionalStoreNYI(int offset,
|
||||
FloatSpecies vsp,
|
||||
@ -3838,6 +3809,21 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
return dummyVector().ldOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
FloatVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
FloatVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Float> m,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
<M> void stOp(M memory, int offset, FStOp<M> f) {
|
||||
@ -3852,6 +3838,20 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
dummyVector().stOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
AbstractMask<Float> m,
|
||||
FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
// N.B. Make sure these constant vectors and
|
||||
// masks load up correctly into registers.
|
||||
//
|
||||
@ -3965,3 +3965,4 @@ public abstract class FloatVector extends AbstractVector<Float> {
|
||||
public static final VectorSpecies<Float> SPECIES_PREFERRED
|
||||
= (FloatSpecies) VectorSpecies.ofPreferred(float.class);
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Int128Vector extends IntVector {
|
||||
(Int128Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int128Vector compress(VectorMask<Integer> m) {
|
||||
return (Int128Vector)
|
||||
super.compressTemplate(Int128Mask.class,
|
||||
(Int128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int128Vector expand(VectorMask<Integer> m) {
|
||||
return (Int128Vector)
|
||||
super.expandTemplate(Int128Mask.class,
|
||||
(Int128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int128Vector selectFrom(Vector<Integer> v) {
|
||||
@ -653,6 +669,15 @@ final class Int128Vector extends IntVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int128Mask compress() {
|
||||
return (Int128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Int128Vector.class, Int128Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -845,29 +870,15 @@ final class Int128Vector extends IntVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
|
||||
return super.fromByteArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
|
||||
return super.fromByteBuffer0Template(Int128Mask.class, bb, offset, (Int128Mask) m); // specialize
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
|
||||
return super.fromMemorySegment0Template(Int128Mask.class, ms, offset, (Int128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -895,22 +906,8 @@ final class Int128Vector extends IntVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
|
||||
super.intoByteArray0Template(Int128Mask.class, a, offset, (Int128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
|
||||
super.intoByteBuffer0Template(Int128Mask.class, bb, offset, (Int128Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
|
||||
super.intoMemorySegment0Template(Int128Mask.class, ms, offset, (Int128Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -919,3 +916,4 @@ final class Int128Vector extends IntVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Int256Vector extends IntVector {
|
||||
(Int256Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int256Vector compress(VectorMask<Integer> m) {
|
||||
return (Int256Vector)
|
||||
super.compressTemplate(Int256Mask.class,
|
||||
(Int256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int256Vector expand(VectorMask<Integer> m) {
|
||||
return (Int256Vector)
|
||||
super.expandTemplate(Int256Mask.class,
|
||||
(Int256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int256Vector selectFrom(Vector<Integer> v) {
|
||||
@ -661,6 +677,15 @@ final class Int256Vector extends IntVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int256Mask compress() {
|
||||
return (Int256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Int256Vector.class, Int256Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -853,29 +878,15 @@ final class Int256Vector extends IntVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
|
||||
return super.fromByteArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
|
||||
return super.fromByteBuffer0Template(Int256Mask.class, bb, offset, (Int256Mask) m); // specialize
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
|
||||
return super.fromMemorySegment0Template(Int256Mask.class, ms, offset, (Int256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -903,22 +914,8 @@ final class Int256Vector extends IntVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
|
||||
super.intoByteArray0Template(Int256Mask.class, a, offset, (Int256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
|
||||
super.intoByteBuffer0Template(Int256Mask.class, bb, offset, (Int256Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
|
||||
super.intoMemorySegment0Template(Int256Mask.class, ms, offset, (Int256Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -927,3 +924,4 @@ final class Int256Vector extends IntVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Int512Vector extends IntVector {
|
||||
(Int512Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int512Vector compress(VectorMask<Integer> m) {
|
||||
return (Int512Vector)
|
||||
super.compressTemplate(Int512Mask.class,
|
||||
(Int512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int512Vector expand(VectorMask<Integer> m) {
|
||||
return (Int512Vector)
|
||||
super.expandTemplate(Int512Mask.class,
|
||||
(Int512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int512Vector selectFrom(Vector<Integer> v) {
|
||||
@ -677,6 +693,15 @@ final class Int512Vector extends IntVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int512Mask compress() {
|
||||
return (Int512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Int512Vector.class, Int512Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -869,29 +894,15 @@ final class Int512Vector extends IntVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
|
||||
return super.fromByteArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
|
||||
return super.fromByteBuffer0Template(Int512Mask.class, bb, offset, (Int512Mask) m); // specialize
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
|
||||
return super.fromMemorySegment0Template(Int512Mask.class, ms, offset, (Int512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -919,22 +930,8 @@ final class Int512Vector extends IntVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
|
||||
super.intoByteArray0Template(Int512Mask.class, a, offset, (Int512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
|
||||
super.intoByteBuffer0Template(Int512Mask.class, bb, offset, (Int512Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
|
||||
super.intoMemorySegment0Template(Int512Mask.class, ms, offset, (Int512Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -943,3 +940,4 @@ final class Int512Vector extends IntVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Int64Vector extends IntVector {
|
||||
(Int64Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int64Vector compress(VectorMask<Integer> m) {
|
||||
return (Int64Vector)
|
||||
super.compressTemplate(Int64Mask.class,
|
||||
(Int64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int64Vector expand(VectorMask<Integer> m) {
|
||||
return (Int64Vector)
|
||||
super.expandTemplate(Int64Mask.class,
|
||||
(Int64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int64Vector selectFrom(Vector<Integer> v) {
|
||||
@ -649,6 +665,15 @@ final class Int64Vector extends IntVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Int64Mask compress() {
|
||||
return (Int64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Int64Vector.class, Int64Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -841,29 +866,15 @@ final class Int64Vector extends IntVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
|
||||
return super.fromByteArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
|
||||
return super.fromByteBuffer0Template(Int64Mask.class, bb, offset, (Int64Mask) m); // specialize
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
|
||||
return super.fromMemorySegment0Template(Int64Mask.class, ms, offset, (Int64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -891,22 +902,8 @@ final class Int64Vector extends IntVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
|
||||
super.intoByteArray0Template(Int64Mask.class, a, offset, (Int64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
|
||||
super.intoByteBuffer0Template(Int64Mask.class, bb, offset, (Int64Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
|
||||
super.intoMemorySegment0Template(Int64Mask.class, ms, offset, (Int64Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -915,3 +912,4 @@ final class Int64Vector extends IntVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class IntMaxVector extends IntVector {
|
||||
(IntMaxVector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public IntMaxVector compress(VectorMask<Integer> m) {
|
||||
return (IntMaxVector)
|
||||
super.compressTemplate(IntMaxMask.class,
|
||||
(IntMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public IntMaxVector expand(VectorMask<Integer> m) {
|
||||
return (IntMaxVector)
|
||||
super.expandTemplate(IntMaxMask.class,
|
||||
(IntMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public IntMaxVector selectFrom(Vector<Integer> v) {
|
||||
@ -647,6 +663,15 @@ final class IntMaxVector extends IntVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public IntMaxMask compress() {
|
||||
return (IntMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
IntMaxVector.class, IntMaxMask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -850,29 +875,15 @@ final class IntMaxVector extends IntVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
|
||||
return super.fromByteArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
|
||||
return super.fromByteBuffer0Template(IntMaxMask.class, bb, offset, (IntMaxMask) m); // specialize
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
|
||||
return super.fromMemorySegment0Template(IntMaxMask.class, ms, offset, (IntMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -900,22 +911,8 @@ final class IntMaxVector extends IntVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m) {
|
||||
super.intoByteArray0Template(IntMaxMask.class, a, offset, (IntMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m) {
|
||||
super.intoByteBuffer0Template(IntMaxMask.class, bb, offset, (IntMaxMask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m) {
|
||||
super.intoMemorySegment0Template(IntMaxMask.class, ms, offset, (IntMaxMask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -924,3 +921,4 @@ final class IntMaxVector extends IntVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,14 +24,14 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ReadOnlyBufferException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.UnaryOperator;
|
||||
|
||||
import jdk.internal.foreign.AbstractMemorySegmentImpl;
|
||||
import jdk.internal.misc.ScopedMemoryAccess;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
@ -57,6 +57,8 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
|
||||
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
|
||||
|
||||
static final ValueLayout.OfInt ELEMENT_LAYOUT = ValueLayout.JAVA_INT.withBitAlignment(8);
|
||||
|
||||
@ForceInline
|
||||
static int opCode(Operator op) {
|
||||
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
|
||||
@ -351,6 +353,45 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
interface FLdLongOp {
|
||||
int apply(MemorySegment memory, long offset, int i);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
IntVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
//dummy; no vec = vec();
|
||||
int[] res = new int[length()];
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
IntVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Integer> m,
|
||||
FLdLongOp f) {
|
||||
//int[] vec = vec();
|
||||
int[] res = new int[length()];
|
||||
boolean[] mbits = ((AbstractMask<Integer>)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
if (mbits[i]) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
static int memorySegmentGet(MemorySegment ms, long o, int i) {
|
||||
return ms.get(ELEMENT_LAYOUT, o + i * 4L);
|
||||
}
|
||||
|
||||
interface FStOp<M> {
|
||||
void apply(M memory, int offset, int i, int a);
|
||||
}
|
||||
@ -381,6 +422,40 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
}
|
||||
}
|
||||
|
||||
interface FStLongOp {
|
||||
void apply(MemorySegment memory, long offset, int i, int a);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
FStLongOp f) {
|
||||
int[] vec = vec();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Integer> m,
|
||||
FStLongOp f) {
|
||||
int[] vec = vec();
|
||||
boolean[] mbits = ((AbstractMask<Integer>)m).getBits();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
if (mbits[i]) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void memorySegmentSet(MemorySegment ms, long o, int i, int e) {
|
||||
ms.set(ELEMENT_LAYOUT, o + i * 4L, e);
|
||||
}
|
||||
|
||||
// Binary test
|
||||
|
||||
/*package-private*/
|
||||
@ -431,6 +506,36 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
return ((int)bits);
|
||||
}
|
||||
|
||||
static IntVector expandHelper(Vector<Integer> v, VectorMask<Integer> m) {
|
||||
VectorSpecies<Integer> vsp = m.vectorSpecies();
|
||||
IntVector r = (IntVector) vsp.zero();
|
||||
IntVector vi = (IntVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(i, vi.lane(j++));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static IntVector compressHelper(Vector<Integer> v, VectorMask<Integer> m) {
|
||||
VectorSpecies<Integer> vsp = m.vectorSpecies();
|
||||
IntVector r = (IntVector) vsp.zero();
|
||||
IntVector vi = (IntVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(j++, vi.lane(i));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// Static factories (other than memory operations)
|
||||
|
||||
// Note: A surprising behavior in javadoc
|
||||
@ -620,6 +725,16 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
v0.uOp(m, (i, a) -> (int) -a);
|
||||
case VECTOR_OP_ABS: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (int) Math.abs(a));
|
||||
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (int) Integer.bitCount(a));
|
||||
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (int) Integer.numberOfTrailingZeros(a));
|
||||
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (int) Integer.numberOfLeadingZeros(a));
|
||||
case VECTOR_OP_REVERSE: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (int) Integer.reverse(a));
|
||||
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (int) Integer.reverseBytes(a));
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
@ -760,6 +875,10 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
|
||||
case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
|
||||
v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
|
||||
case VECTOR_OP_COMPRESS_BITS: return (v0, v1, vm) ->
|
||||
v0.bOp(v1, vm, (i, a, n) -> Integer.compress(a, n));
|
||||
case VECTOR_OP_EXPAND_BITS: return (v0, v1, vm) ->
|
||||
v0.bOp(v1, vm, (i, a, n) -> Integer.expand(a, n));
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
@ -1745,6 +1864,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
return lanewise(ABS);
|
||||
}
|
||||
|
||||
|
||||
// not (~)
|
||||
/**
|
||||
* Computes the bitwise logical complement ({@code ~})
|
||||
@ -2371,6 +2491,45 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
IntVector::toShuffle0);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
IntVector compress(VectorMask<Integer> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Integer>>
|
||||
IntVector compressTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (IntVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
|
||||
int.class, length(), this, m,
|
||||
(v1, m1) -> compressHelper(v1, m1));
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
IntVector expand(VectorMask<Integer> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Integer>>
|
||||
IntVector expandTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (IntVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
|
||||
int.class, length(), this, m,
|
||||
(v1, m1) -> expandHelper(v1, m1));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@ -2776,90 +2935,6 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
IntVector fromByteArray(VectorSpecies<Integer> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
|
||||
IntSpecies vsp = (IntSpecies) species;
|
||||
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code int} (zero).
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
IntVector fromByteArray(VectorSpecies<Integer> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Integer> m) {
|
||||
IntSpecies vsp = (IntSpecies) species;
|
||||
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
|
||||
ByteBuffer wb = wrapper(a, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Integer>)m,
|
||||
(wb_, o, i) -> wb_.getInt(o + i * 4));
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from an array of type {@code int[]}
|
||||
* starting at an offset.
|
||||
@ -3032,44 +3107,49 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer.
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
|
||||
* fromMemorySegment()} as follows:
|
||||
* <pre>{@code
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* return fromMemorySegment(species, ms, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*4 < 0}
|
||||
* or {@code offset+N*4 >= bb.limit()}
|
||||
* or {@code offset+N*4 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
IntVector fromByteBuffer(VectorSpecies<Integer> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
|
||||
IntVector fromMemorySegment(VectorSpecies<Integer> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
|
||||
IntSpecies vsp = (IntSpecies) species;
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code int} (zero).
|
||||
@ -3080,13 +3160,11 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
* <p>
|
||||
* The following pseudocode illustrates the behavior:
|
||||
* <pre>{@code
|
||||
* IntBuffer eb = bb.duplicate()
|
||||
* .position(offset)
|
||||
* .order(bo).asIntBuffer();
|
||||
* var slice = ms.asSlice(offset);
|
||||
* int[] ar = new int[species.length()];
|
||||
* for (int n = 0; n < ar.length; n++) {
|
||||
* if (m.laneIsSet(n)) {
|
||||
* ar[n] = eb.get(n);
|
||||
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_INT.withBitAlignment(8), n);
|
||||
* }
|
||||
* }
|
||||
* IntVector r = IntVector.fromArray(species, ar, 0);
|
||||
@ -3100,33 +3178,36 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
* the bytes of lane values.
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*4 < 0}
|
||||
* or {@code offset+N*4 >= bb.limit()}
|
||||
* or {@code offset+N*4 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
IntVector fromByteBuffer(VectorSpecies<Integer> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Integer> m) {
|
||||
IntVector fromMemorySegment(VectorSpecies<Integer> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Integer> m) {
|
||||
IntSpecies vsp = (IntSpecies) species;
|
||||
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
|
||||
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
|
||||
ByteBuffer wb = wrapper(bb, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Integer>)m,
|
||||
(wb_, o, i) -> wb_.getInt(o + i * 4));
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, ms.byteSize());
|
||||
return vsp.ldLongOp(ms, offset, m, IntVector::memorySegmentGet);
|
||||
}
|
||||
|
||||
// Memory store operations
|
||||
@ -3156,7 +3237,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
this,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3297,67 +3378,40 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, byteSize(), a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Integer> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteArray(a, offset, bo);
|
||||
} else {
|
||||
IntSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset, m);
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
|
||||
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
if (ScopedMemoryAccess.isReadOnly(bb)) {
|
||||
throw new ReadOnlyBufferException();
|
||||
}
|
||||
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Integer> m) {
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Integer> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteBuffer(bb, offset, bo);
|
||||
intoMemorySegment(ms, offset, bo);
|
||||
} else {
|
||||
if (bb.isReadOnly()) {
|
||||
throw new ReadOnlyBufferException();
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
IntSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
|
||||
checkMaskFromIndexSize(offset, vsp, m, 4, ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3391,7 +3445,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3408,7 +3462,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3448,74 +3502,33 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
abstract
|
||||
IntVector fromByteArray0(byte[] a, int offset);
|
||||
IntVector fromMemorySegment0(MemorySegment bb, long offset);
|
||||
@ForceInline
|
||||
final
|
||||
IntVector fromByteArray0Template(byte[] a, int offset) {
|
||||
IntVector fromMemorySegment0Template(MemorySegment ms, long offset) {
|
||||
IntSpecies vsp = vspecies();
|
||||
return VectorSupport.load(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.getInt(o + i * 4));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
IntVector fromByteArray0(byte[] a, int offset, VectorMask<Integer> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Integer>>
|
||||
IntVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
IntSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return VectorSupport.loadMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.getInt(o + i * 4));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
IntVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
|
||||
IntSpecies vsp = vspecies();
|
||||
return ScopedMemoryAccess.loadFromByteBuffer(
|
||||
return ScopedMemoryAccess.loadFromMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, vsp,
|
||||
(buf, off, s) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.getInt(o + i * 4));
|
||||
(AbstractMemorySegmentImpl) ms, offset, vsp,
|
||||
(msp, off, s) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, IntVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
IntVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m);
|
||||
IntVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Integer> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Integer>>
|
||||
IntVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
IntVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
IntSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return ScopedMemoryAccess.loadFromByteBufferMasked(
|
||||
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, m, vsp,
|
||||
(buf, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.getInt(o + i * 4));
|
||||
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
|
||||
(msp, off, s, vm) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, vm, IntVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3534,7 +3547,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_+i] = e));
|
||||
}
|
||||
|
||||
@ -3551,7 +3564,7 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3590,71 +3603,33 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
}
|
||||
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteArray0Template(byte[] a, int offset) {
|
||||
void intoMemorySegment0(MemorySegment ms, long offset) {
|
||||
IntSpecies vsp = vspecies();
|
||||
VectorSupport.store(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(tb_, o, i, e) -> tb_.putInt(o + i * 4, e));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Integer> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Integer>>
|
||||
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
IntSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
VectorSupport.storeMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(tb_, o, i, e) -> tb_.putInt(o + i * 4, e));
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset) {
|
||||
IntSpecies vsp = vspecies();
|
||||
ScopedMemoryAccess.storeIntoByteBuffer(
|
||||
ScopedMemoryAccess.storeIntoMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
this, bb, offset,
|
||||
(buf, off, v) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(wb_, o, i, e) -> wb_.putInt(o + i * 4, e));
|
||||
this,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, IntVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Integer> m);
|
||||
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Integer> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Integer>>
|
||||
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
IntSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
ScopedMemoryAccess.storeIntoByteBufferMasked(
|
||||
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
this, m, bb, offset,
|
||||
(buf, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(wb_, o, i, e) -> wb_.putInt(o + i * 4, e));
|
||||
this, m,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v, vm) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, vm, IntVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3671,6 +3646,16 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
private static
|
||||
void checkMaskFromIndexSize(long offset,
|
||||
IntSpecies vsp,
|
||||
VectorMask<Integer> m,
|
||||
int scale,
|
||||
long limit) {
|
||||
((AbstractMask<Integer>)m)
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private void conditionalStoreNYI(int offset,
|
||||
IntSpecies vsp,
|
||||
@ -3981,6 +3966,21 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
return dummyVector().ldOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
IntVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
IntVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Integer> m,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
<M> void stOp(M memory, int offset, FStOp<M> f) {
|
||||
@ -3995,6 +3995,20 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
dummyVector().stOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
AbstractMask<Integer> m,
|
||||
FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
// N.B. Make sure these constant vectors and
|
||||
// masks load up correctly into registers.
|
||||
//
|
||||
@ -4108,3 +4122,4 @@ public abstract class IntVector extends AbstractVector<Integer> {
|
||||
public static final VectorSpecies<Integer> SPECIES_PREFERRED
|
||||
= (IntSpecies) VectorSpecies.ofPreferred(int.class);
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -464,6 +464,22 @@ final class Long128Vector extends LongVector {
|
||||
(Long128Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long128Vector compress(VectorMask<Long> m) {
|
||||
return (Long128Vector)
|
||||
super.compressTemplate(Long128Mask.class,
|
||||
(Long128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long128Vector expand(VectorMask<Long> m) {
|
||||
return (Long128Vector)
|
||||
super.expandTemplate(Long128Mask.class,
|
||||
(Long128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long128Vector selectFrom(Vector<Long> v) {
|
||||
@ -639,6 +655,15 @@ final class Long128Vector extends LongVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long128Mask compress() {
|
||||
return (Long128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Long128Vector.class, Long128Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -831,29 +856,15 @@ final class Long128Vector extends LongVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
|
||||
return super.fromByteArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
|
||||
return super.fromByteBuffer0Template(Long128Mask.class, bb, offset, (Long128Mask) m); // specialize
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
|
||||
return super.fromMemorySegment0Template(Long128Mask.class, ms, offset, (Long128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -881,22 +892,8 @@ final class Long128Vector extends LongVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
|
||||
super.intoByteArray0Template(Long128Mask.class, a, offset, (Long128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
|
||||
super.intoByteBuffer0Template(Long128Mask.class, bb, offset, (Long128Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
|
||||
super.intoMemorySegment0Template(Long128Mask.class, ms, offset, (Long128Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -905,3 +902,4 @@ final class Long128Vector extends LongVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -464,6 +464,22 @@ final class Long256Vector extends LongVector {
|
||||
(Long256Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long256Vector compress(VectorMask<Long> m) {
|
||||
return (Long256Vector)
|
||||
super.compressTemplate(Long256Mask.class,
|
||||
(Long256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long256Vector expand(VectorMask<Long> m) {
|
||||
return (Long256Vector)
|
||||
super.expandTemplate(Long256Mask.class,
|
||||
(Long256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long256Vector selectFrom(Vector<Long> v) {
|
||||
@ -643,6 +659,15 @@ final class Long256Vector extends LongVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long256Mask compress() {
|
||||
return (Long256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Long256Vector.class, Long256Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -835,29 +860,15 @@ final class Long256Vector extends LongVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
|
||||
return super.fromByteArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
|
||||
return super.fromByteBuffer0Template(Long256Mask.class, bb, offset, (Long256Mask) m); // specialize
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
|
||||
return super.fromMemorySegment0Template(Long256Mask.class, ms, offset, (Long256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -885,22 +896,8 @@ final class Long256Vector extends LongVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
|
||||
super.intoByteArray0Template(Long256Mask.class, a, offset, (Long256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
|
||||
super.intoByteBuffer0Template(Long256Mask.class, bb, offset, (Long256Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
|
||||
super.intoMemorySegment0Template(Long256Mask.class, ms, offset, (Long256Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -909,3 +906,4 @@ final class Long256Vector extends LongVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -464,6 +464,22 @@ final class Long512Vector extends LongVector {
|
||||
(Long512Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long512Vector compress(VectorMask<Long> m) {
|
||||
return (Long512Vector)
|
||||
super.compressTemplate(Long512Mask.class,
|
||||
(Long512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long512Vector expand(VectorMask<Long> m) {
|
||||
return (Long512Vector)
|
||||
super.expandTemplate(Long512Mask.class,
|
||||
(Long512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long512Vector selectFrom(Vector<Long> v) {
|
||||
@ -651,6 +667,15 @@ final class Long512Vector extends LongVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long512Mask compress() {
|
||||
return (Long512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Long512Vector.class, Long512Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -843,29 +868,15 @@ final class Long512Vector extends LongVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
|
||||
return super.fromByteArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
|
||||
return super.fromByteBuffer0Template(Long512Mask.class, bb, offset, (Long512Mask) m); // specialize
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
|
||||
return super.fromMemorySegment0Template(Long512Mask.class, ms, offset, (Long512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -893,22 +904,8 @@ final class Long512Vector extends LongVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
|
||||
super.intoByteArray0Template(Long512Mask.class, a, offset, (Long512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
|
||||
super.intoByteBuffer0Template(Long512Mask.class, bb, offset, (Long512Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
|
||||
super.intoMemorySegment0Template(Long512Mask.class, ms, offset, (Long512Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -917,3 +914,4 @@ final class Long512Vector extends LongVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -464,6 +464,22 @@ final class Long64Vector extends LongVector {
|
||||
(Long64Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long64Vector compress(VectorMask<Long> m) {
|
||||
return (Long64Vector)
|
||||
super.compressTemplate(Long64Mask.class,
|
||||
(Long64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long64Vector expand(VectorMask<Long> m) {
|
||||
return (Long64Vector)
|
||||
super.expandTemplate(Long64Mask.class,
|
||||
(Long64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long64Vector selectFrom(Vector<Long> v) {
|
||||
@ -637,6 +653,15 @@ final class Long64Vector extends LongVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Long64Mask compress() {
|
||||
return (Long64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Long64Vector.class, Long64Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -829,29 +854,15 @@ final class Long64Vector extends LongVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
|
||||
return super.fromByteArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
|
||||
return super.fromByteBuffer0Template(Long64Mask.class, bb, offset, (Long64Mask) m); // specialize
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
|
||||
return super.fromMemorySegment0Template(Long64Mask.class, ms, offset, (Long64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -879,22 +890,8 @@ final class Long64Vector extends LongVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
|
||||
super.intoByteArray0Template(Long64Mask.class, a, offset, (Long64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
|
||||
super.intoByteBuffer0Template(Long64Mask.class, bb, offset, (Long64Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
|
||||
super.intoMemorySegment0Template(Long64Mask.class, ms, offset, (Long64Mask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -903,3 +900,4 @@ final class Long64Vector extends LongVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -464,6 +464,22 @@ final class LongMaxVector extends LongVector {
|
||||
(LongMaxVector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public LongMaxVector compress(VectorMask<Long> m) {
|
||||
return (LongMaxVector)
|
||||
super.compressTemplate(LongMaxMask.class,
|
||||
(LongMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public LongMaxVector expand(VectorMask<Long> m) {
|
||||
return (LongMaxVector)
|
||||
super.expandTemplate(LongMaxMask.class,
|
||||
(LongMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public LongMaxVector selectFrom(Vector<Long> v) {
|
||||
@ -637,6 +653,15 @@ final class LongMaxVector extends LongVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public LongMaxMask compress() {
|
||||
return (LongMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
LongMaxVector.class, LongMaxMask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -829,29 +854,15 @@ final class LongMaxVector extends LongVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m) {
|
||||
return super.fromByteArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
|
||||
return super.fromByteBuffer0Template(LongMaxMask.class, bb, offset, (LongMaxMask) m); // specialize
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
|
||||
return super.fromMemorySegment0Template(LongMaxMask.class, ms, offset, (LongMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -879,22 +890,8 @@ final class LongMaxVector extends LongVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m) {
|
||||
super.intoByteArray0Template(LongMaxMask.class, a, offset, (LongMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m) {
|
||||
super.intoByteBuffer0Template(LongMaxMask.class, bb, offset, (LongMaxMask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m) {
|
||||
super.intoMemorySegment0Template(LongMaxMask.class, ms, offset, (LongMaxMask) m);
|
||||
}
|
||||
|
||||
|
||||
@ -903,3 +900,4 @@ final class LongMaxVector extends LongVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,14 +24,14 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ReadOnlyBufferException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.UnaryOperator;
|
||||
|
||||
import jdk.internal.foreign.AbstractMemorySegmentImpl;
|
||||
import jdk.internal.misc.ScopedMemoryAccess;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
@ -57,6 +57,8 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
|
||||
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
|
||||
|
||||
static final ValueLayout.OfLong ELEMENT_LAYOUT = ValueLayout.JAVA_LONG.withBitAlignment(8);
|
||||
|
||||
@ForceInline
|
||||
static int opCode(Operator op) {
|
||||
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
|
||||
@ -351,6 +353,45 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
interface FLdLongOp {
|
||||
long apply(MemorySegment memory, long offset, int i);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
LongVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
//dummy; no vec = vec();
|
||||
long[] res = new long[length()];
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
LongVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Long> m,
|
||||
FLdLongOp f) {
|
||||
//long[] vec = vec();
|
||||
long[] res = new long[length()];
|
||||
boolean[] mbits = ((AbstractMask<Long>)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
if (mbits[i]) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
static long memorySegmentGet(MemorySegment ms, long o, int i) {
|
||||
return ms.get(ELEMENT_LAYOUT, o + i * 8L);
|
||||
}
|
||||
|
||||
interface FStOp<M> {
|
||||
void apply(M memory, int offset, int i, long a);
|
||||
}
|
||||
@ -381,6 +422,40 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
}
|
||||
}
|
||||
|
||||
interface FStLongOp {
|
||||
void apply(MemorySegment memory, long offset, int i, long a);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
FStLongOp f) {
|
||||
long[] vec = vec();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Long> m,
|
||||
FStLongOp f) {
|
||||
long[] vec = vec();
|
||||
boolean[] mbits = ((AbstractMask<Long>)m).getBits();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
if (mbits[i]) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void memorySegmentSet(MemorySegment ms, long o, int i, long e) {
|
||||
ms.set(ELEMENT_LAYOUT, o + i * 8L, e);
|
||||
}
|
||||
|
||||
// Binary test
|
||||
|
||||
/*package-private*/
|
||||
@ -431,6 +506,36 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
return ((long)bits);
|
||||
}
|
||||
|
||||
static LongVector expandHelper(Vector<Long> v, VectorMask<Long> m) {
|
||||
VectorSpecies<Long> vsp = m.vectorSpecies();
|
||||
LongVector r = (LongVector) vsp.zero();
|
||||
LongVector vi = (LongVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(i, vi.lane(j++));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static LongVector compressHelper(Vector<Long> v, VectorMask<Long> m) {
|
||||
VectorSpecies<Long> vsp = m.vectorSpecies();
|
||||
LongVector r = (LongVector) vsp.zero();
|
||||
LongVector vi = (LongVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(j++, vi.lane(i));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// Static factories (other than memory operations)
|
||||
|
||||
// Note: A surprising behavior in javadoc
|
||||
@ -578,6 +683,16 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
v0.uOp(m, (i, a) -> (long) -a);
|
||||
case VECTOR_OP_ABS: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (long) Math.abs(a));
|
||||
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (long) Long.bitCount(a));
|
||||
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (long) Long.numberOfTrailingZeros(a));
|
||||
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (long) Long.numberOfLeadingZeros(a));
|
||||
case VECTOR_OP_REVERSE: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (long) Long.reverse(a));
|
||||
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (long) Long.reverseBytes(a));
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
@ -718,6 +833,10 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
|
||||
case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
|
||||
v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
|
||||
case VECTOR_OP_COMPRESS_BITS: return (v0, v1, vm) ->
|
||||
v0.bOp(v1, vm, (i, a, n) -> Long.compress(a, n));
|
||||
case VECTOR_OP_EXPAND_BITS: return (v0, v1, vm) ->
|
||||
v0.bOp(v1, vm, (i, a, n) -> Long.expand(a, n));
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
@ -1658,6 +1777,7 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
return lanewise(ABS);
|
||||
}
|
||||
|
||||
|
||||
// not (~)
|
||||
/**
|
||||
* Computes the bitwise logical complement ({@code ~})
|
||||
@ -2237,6 +2357,45 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
LongVector::toShuffle0);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
LongVector compress(VectorMask<Long> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Long>>
|
||||
LongVector compressTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (LongVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
|
||||
long.class, length(), this, m,
|
||||
(v1, m1) -> compressHelper(v1, m1));
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
LongVector expand(VectorMask<Long> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Long>>
|
||||
LongVector expandTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (LongVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
|
||||
long.class, length(), this, m,
|
||||
(v1, m1) -> expandHelper(v1, m1));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@ -2637,90 +2796,6 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
LongVector fromByteArray(VectorSpecies<Long> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
|
||||
LongSpecies vsp = (LongSpecies) species;
|
||||
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code long} (zero).
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
LongVector fromByteArray(VectorSpecies<Long> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Long> m) {
|
||||
LongSpecies vsp = (LongSpecies) species;
|
||||
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, a.length);
|
||||
ByteBuffer wb = wrapper(a, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Long>)m,
|
||||
(wb_, o, i) -> wb_.getLong(o + i * 8));
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from an array of type {@code long[]}
|
||||
* starting at an offset.
|
||||
@ -2911,44 +2986,49 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer.
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
|
||||
* fromMemorySegment()} as follows:
|
||||
* <pre>{@code
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* return fromMemorySegment(species, ms, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*8 < 0}
|
||||
* or {@code offset+N*8 >= bb.limit()}
|
||||
* or {@code offset+N*8 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
LongVector fromByteBuffer(VectorSpecies<Long> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
|
||||
LongVector fromMemorySegment(VectorSpecies<Long> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
|
||||
LongSpecies vsp = (LongSpecies) species;
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code long} (zero).
|
||||
@ -2959,13 +3039,11 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
* <p>
|
||||
* The following pseudocode illustrates the behavior:
|
||||
* <pre>{@code
|
||||
* LongBuffer eb = bb.duplicate()
|
||||
* .position(offset)
|
||||
* .order(bo).asLongBuffer();
|
||||
* var slice = ms.asSlice(offset);
|
||||
* long[] ar = new long[species.length()];
|
||||
* for (int n = 0; n < ar.length; n++) {
|
||||
* if (m.laneIsSet(n)) {
|
||||
* ar[n] = eb.get(n);
|
||||
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_LONG.withBitAlignment(8), n);
|
||||
* }
|
||||
* }
|
||||
* LongVector r = LongVector.fromArray(species, ar, 0);
|
||||
@ -2979,33 +3057,36 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
* the bytes of lane values.
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*8 < 0}
|
||||
* or {@code offset+N*8 >= bb.limit()}
|
||||
* or {@code offset+N*8 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
LongVector fromByteBuffer(VectorSpecies<Long> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Long> m) {
|
||||
LongVector fromMemorySegment(VectorSpecies<Long> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Long> m) {
|
||||
LongSpecies vsp = (LongSpecies) species;
|
||||
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
|
||||
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, bb.limit());
|
||||
ByteBuffer wb = wrapper(bb, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Long>)m,
|
||||
(wb_, o, i) -> wb_.getLong(o + i * 8));
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, ms.byteSize());
|
||||
return vsp.ldLongOp(ms, offset, m, LongVector::memorySegmentGet);
|
||||
}
|
||||
|
||||
// Memory store operations
|
||||
@ -3035,7 +3116,7 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
this,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3195,67 +3276,40 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, byteSize(), a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Long> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteArray(a, offset, bo);
|
||||
} else {
|
||||
LongSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset, m);
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
|
||||
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
if (ScopedMemoryAccess.isReadOnly(bb)) {
|
||||
throw new ReadOnlyBufferException();
|
||||
}
|
||||
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Long> m) {
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Long> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteBuffer(bb, offset, bo);
|
||||
intoMemorySegment(ms, offset, bo);
|
||||
} else {
|
||||
if (bb.isReadOnly()) {
|
||||
throw new ReadOnlyBufferException();
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
LongSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
|
||||
checkMaskFromIndexSize(offset, vsp, m, 8, ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3289,7 +3343,7 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3306,7 +3360,7 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3364,74 +3418,33 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
abstract
|
||||
LongVector fromByteArray0(byte[] a, int offset);
|
||||
LongVector fromMemorySegment0(MemorySegment bb, long offset);
|
||||
@ForceInline
|
||||
final
|
||||
LongVector fromByteArray0Template(byte[] a, int offset) {
|
||||
LongVector fromMemorySegment0Template(MemorySegment ms, long offset) {
|
||||
LongSpecies vsp = vspecies();
|
||||
return VectorSupport.load(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.getLong(o + i * 8));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
LongVector fromByteArray0(byte[] a, int offset, VectorMask<Long> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Long>>
|
||||
LongVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
LongSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return VectorSupport.loadMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.getLong(o + i * 8));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
LongVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
|
||||
LongSpecies vsp = vspecies();
|
||||
return ScopedMemoryAccess.loadFromByteBuffer(
|
||||
return ScopedMemoryAccess.loadFromMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, vsp,
|
||||
(buf, off, s) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.getLong(o + i * 8));
|
||||
(AbstractMemorySegmentImpl) ms, offset, vsp,
|
||||
(msp, off, s) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, LongVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
LongVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m);
|
||||
LongVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Long> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Long>>
|
||||
LongVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
LongVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
LongSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return ScopedMemoryAccess.loadFromByteBufferMasked(
|
||||
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, m, vsp,
|
||||
(buf, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.getLong(o + i * 8));
|
||||
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
|
||||
(msp, off, s, vm) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, vm, LongVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3450,7 +3463,7 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_+i] = e));
|
||||
}
|
||||
|
||||
@ -3467,7 +3480,7 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3525,71 +3538,33 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
}
|
||||
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteArray0Template(byte[] a, int offset) {
|
||||
void intoMemorySegment0(MemorySegment ms, long offset) {
|
||||
LongSpecies vsp = vspecies();
|
||||
VectorSupport.store(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(tb_, o, i, e) -> tb_.putLong(o + i * 8, e));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Long> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Long>>
|
||||
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
LongSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
VectorSupport.storeMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(tb_, o, i, e) -> tb_.putLong(o + i * 8, e));
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset) {
|
||||
LongSpecies vsp = vspecies();
|
||||
ScopedMemoryAccess.storeIntoByteBuffer(
|
||||
ScopedMemoryAccess.storeIntoMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
this, bb, offset,
|
||||
(buf, off, v) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(wb_, o, i, e) -> wb_.putLong(o + i * 8, e));
|
||||
this,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, LongVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Long> m);
|
||||
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Long> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Long>>
|
||||
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
LongSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
ScopedMemoryAccess.storeIntoByteBufferMasked(
|
||||
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
this, m, bb, offset,
|
||||
(buf, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(wb_, o, i, e) -> wb_.putLong(o + i * 8, e));
|
||||
this, m,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v, vm) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, vm, LongVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3606,6 +3581,16 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
private static
|
||||
void checkMaskFromIndexSize(long offset,
|
||||
LongSpecies vsp,
|
||||
VectorMask<Long> m,
|
||||
int scale,
|
||||
long limit) {
|
||||
((AbstractMask<Long>)m)
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private void conditionalStoreNYI(int offset,
|
||||
LongSpecies vsp,
|
||||
@ -3907,6 +3892,21 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
return dummyVector().ldOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
LongVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
LongVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Long> m,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
<M> void stOp(M memory, int offset, FStOp<M> f) {
|
||||
@ -3921,6 +3921,20 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
dummyVector().stOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
AbstractMask<Long> m,
|
||||
FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
// N.B. Make sure these constant vectors and
|
||||
// masks load up correctly into registers.
|
||||
//
|
||||
@ -4034,3 +4048,4 @@ public abstract class LongVector extends AbstractVector<Long> {
|
||||
public static final VectorSpecies<Long> SPECIES_PREFERRED
|
||||
= (LongSpecies) VectorSpecies.ofPreferred(long.class);
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Short128Vector extends ShortVector {
|
||||
(Short128Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short128Vector compress(VectorMask<Short> m) {
|
||||
return (Short128Vector)
|
||||
super.compressTemplate(Short128Mask.class,
|
||||
(Short128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short128Vector expand(VectorMask<Short> m) {
|
||||
return (Short128Vector)
|
||||
super.expandTemplate(Short128Mask.class,
|
||||
(Short128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short128Vector selectFrom(Vector<Short> v) {
|
||||
@ -661,6 +677,15 @@ final class Short128Vector extends ShortVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short128Mask compress() {
|
||||
return (Short128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Short128Vector.class, Short128Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -860,29 +885,15 @@ final class Short128Vector extends ShortVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
|
||||
return super.fromByteArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
|
||||
return super.fromByteBuffer0Template(Short128Mask.class, bb, offset, (Short128Mask) m); // specialize
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
|
||||
return super.fromMemorySegment0Template(Short128Mask.class, ms, offset, (Short128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -904,22 +915,8 @@ final class Short128Vector extends ShortVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
|
||||
super.intoByteArray0Template(Short128Mask.class, a, offset, (Short128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
|
||||
super.intoByteBuffer0Template(Short128Mask.class, bb, offset, (Short128Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
|
||||
super.intoMemorySegment0Template(Short128Mask.class, ms, offset, (Short128Mask) m);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -934,3 +931,4 @@ final class Short128Vector extends ShortVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Short256Vector extends ShortVector {
|
||||
(Short256Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short256Vector compress(VectorMask<Short> m) {
|
||||
return (Short256Vector)
|
||||
super.compressTemplate(Short256Mask.class,
|
||||
(Short256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short256Vector expand(VectorMask<Short> m) {
|
||||
return (Short256Vector)
|
||||
super.expandTemplate(Short256Mask.class,
|
||||
(Short256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short256Vector selectFrom(Vector<Short> v) {
|
||||
@ -677,6 +693,15 @@ final class Short256Vector extends ShortVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short256Mask compress() {
|
||||
return (Short256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Short256Vector.class, Short256Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -876,29 +901,15 @@ final class Short256Vector extends ShortVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
|
||||
return super.fromByteArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
|
||||
return super.fromByteBuffer0Template(Short256Mask.class, bb, offset, (Short256Mask) m); // specialize
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
|
||||
return super.fromMemorySegment0Template(Short256Mask.class, ms, offset, (Short256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -920,22 +931,8 @@ final class Short256Vector extends ShortVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
|
||||
super.intoByteArray0Template(Short256Mask.class, a, offset, (Short256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
|
||||
super.intoByteBuffer0Template(Short256Mask.class, bb, offset, (Short256Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
|
||||
super.intoMemorySegment0Template(Short256Mask.class, ms, offset, (Short256Mask) m);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -950,3 +947,4 @@ final class Short256Vector extends ShortVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Short512Vector extends ShortVector {
|
||||
(Short512Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short512Vector compress(VectorMask<Short> m) {
|
||||
return (Short512Vector)
|
||||
super.compressTemplate(Short512Mask.class,
|
||||
(Short512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short512Vector expand(VectorMask<Short> m) {
|
||||
return (Short512Vector)
|
||||
super.expandTemplate(Short512Mask.class,
|
||||
(Short512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short512Vector selectFrom(Vector<Short> v) {
|
||||
@ -709,6 +725,15 @@ final class Short512Vector extends ShortVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short512Mask compress() {
|
||||
return (Short512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Short512Vector.class, Short512Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -908,29 +933,15 @@ final class Short512Vector extends ShortVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
|
||||
return super.fromByteArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
|
||||
return super.fromByteBuffer0Template(Short512Mask.class, bb, offset, (Short512Mask) m); // specialize
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
|
||||
return super.fromMemorySegment0Template(Short512Mask.class, ms, offset, (Short512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -952,22 +963,8 @@ final class Short512Vector extends ShortVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
|
||||
super.intoByteArray0Template(Short512Mask.class, a, offset, (Short512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
|
||||
super.intoByteBuffer0Template(Short512Mask.class, bb, offset, (Short512Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
|
||||
super.intoMemorySegment0Template(Short512Mask.class, ms, offset, (Short512Mask) m);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -982,3 +979,4 @@ final class Short512Vector extends ShortVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class Short64Vector extends ShortVector {
|
||||
(Short64Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short64Vector compress(VectorMask<Short> m) {
|
||||
return (Short64Vector)
|
||||
super.compressTemplate(Short64Mask.class,
|
||||
(Short64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short64Vector expand(VectorMask<Short> m) {
|
||||
return (Short64Vector)
|
||||
super.expandTemplate(Short64Mask.class,
|
||||
(Short64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short64Vector selectFrom(Vector<Short> v) {
|
||||
@ -653,6 +669,15 @@ final class Short64Vector extends ShortVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Short64Mask compress() {
|
||||
return (Short64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
Short64Vector.class, Short64Mask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -852,29 +877,15 @@ final class Short64Vector extends ShortVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
|
||||
return super.fromByteArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
|
||||
return super.fromByteBuffer0Template(Short64Mask.class, bb, offset, (Short64Mask) m); // specialize
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
|
||||
return super.fromMemorySegment0Template(Short64Mask.class, ms, offset, (Short64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -896,22 +907,8 @@ final class Short64Vector extends ShortVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
|
||||
super.intoByteArray0Template(Short64Mask.class, a, offset, (Short64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
|
||||
super.intoByteBuffer0Template(Short64Mask.class, bb, offset, (Short64Mask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
|
||||
super.intoMemorySegment0Template(Short64Mask.class, ms, offset, (Short64Mask) m);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -926,3 +923,4 @@ final class Short64Vector extends ShortVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -474,6 +474,22 @@ final class ShortMaxVector extends ShortVector {
|
||||
(ShortMaxVector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ShortMaxVector compress(VectorMask<Short> m) {
|
||||
return (ShortMaxVector)
|
||||
super.compressTemplate(ShortMaxMask.class,
|
||||
(ShortMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ShortMaxVector expand(VectorMask<Short> m) {
|
||||
return (ShortMaxVector)
|
||||
super.expandTemplate(ShortMaxMask.class,
|
||||
(ShortMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ShortMaxVector selectFrom(Vector<Short> v) {
|
||||
@ -647,6 +663,15 @@ final class ShortMaxVector extends ShortVector {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ShortMaxMask compress() {
|
||||
return (ShortMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
ShortMaxVector.class, ShortMaxMask.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -846,29 +871,15 @@ final class ShortMaxVector extends ShortVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m) {
|
||||
return super.fromByteArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
|
||||
return super.fromByteBuffer0Template(ShortMaxMask.class, bb, offset, (ShortMaxMask) m); // specialize
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
|
||||
return super.fromMemorySegment0Template(ShortMaxMask.class, ms, offset, (ShortMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -890,22 +901,8 @@ final class ShortMaxVector extends ShortVector {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m) {
|
||||
super.intoByteArray0Template(ShortMaxMask.class, a, offset, (ShortMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m) {
|
||||
super.intoByteBuffer0Template(ShortMaxMask.class, bb, offset, (ShortMaxMask) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m) {
|
||||
super.intoMemorySegment0Template(ShortMaxMask.class, ms, offset, (ShortMaxMask) m);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -920,3 +917,4 @@ final class ShortMaxVector extends ShortVector {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -24,14 +24,14 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ReadOnlyBufferException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.UnaryOperator;
|
||||
|
||||
import jdk.internal.foreign.AbstractMemorySegmentImpl;
|
||||
import jdk.internal.misc.ScopedMemoryAccess;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
@ -57,6 +57,8 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
|
||||
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
|
||||
|
||||
static final ValueLayout.OfShort ELEMENT_LAYOUT = ValueLayout.JAVA_SHORT.withBitAlignment(8);
|
||||
|
||||
@ForceInline
|
||||
static int opCode(Operator op) {
|
||||
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
|
||||
@ -351,6 +353,45 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
interface FLdLongOp {
|
||||
short apply(MemorySegment memory, long offset, int i);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
ShortVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
//dummy; no vec = vec();
|
||||
short[] res = new short[length()];
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
ShortVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Short> m,
|
||||
FLdLongOp f) {
|
||||
//short[] vec = vec();
|
||||
short[] res = new short[length()];
|
||||
boolean[] mbits = ((AbstractMask<Short>)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
if (mbits[i]) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
static short memorySegmentGet(MemorySegment ms, long o, int i) {
|
||||
return ms.get(ELEMENT_LAYOUT, o + i * 2L);
|
||||
}
|
||||
|
||||
interface FStOp<M> {
|
||||
void apply(M memory, int offset, int i, short a);
|
||||
}
|
||||
@ -381,6 +422,40 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
}
|
||||
}
|
||||
|
||||
interface FStLongOp {
|
||||
void apply(MemorySegment memory, long offset, int i, short a);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
FStLongOp f) {
|
||||
short[] vec = vec();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Short> m,
|
||||
FStLongOp f) {
|
||||
short[] vec = vec();
|
||||
boolean[] mbits = ((AbstractMask<Short>)m).getBits();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
if (mbits[i]) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void memorySegmentSet(MemorySegment ms, long o, int i, short e) {
|
||||
ms.set(ELEMENT_LAYOUT, o + i * 2L, e);
|
||||
}
|
||||
|
||||
// Binary test
|
||||
|
||||
/*package-private*/
|
||||
@ -431,6 +506,36 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
return ((short)bits);
|
||||
}
|
||||
|
||||
static ShortVector expandHelper(Vector<Short> v, VectorMask<Short> m) {
|
||||
VectorSpecies<Short> vsp = m.vectorSpecies();
|
||||
ShortVector r = (ShortVector) vsp.zero();
|
||||
ShortVector vi = (ShortVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(i, vi.lane(j++));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static ShortVector compressHelper(Vector<Short> v, VectorMask<Short> m) {
|
||||
VectorSpecies<Short> vsp = m.vectorSpecies();
|
||||
ShortVector r = (ShortVector) vsp.zero();
|
||||
ShortVector vi = (ShortVector) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(j++, vi.lane(i));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// Static factories (other than memory operations)
|
||||
|
||||
// Note: A surprising behavior in javadoc
|
||||
@ -620,6 +725,16 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
v0.uOp(m, (i, a) -> (short) -a);
|
||||
case VECTOR_OP_ABS: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (short) Math.abs(a));
|
||||
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (short) bitCount(a));
|
||||
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (short) numberOfTrailingZeros(a));
|
||||
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (short) numberOfLeadingZeros(a));
|
||||
case VECTOR_OP_REVERSE: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> reverse(a));
|
||||
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> (short) Short.reverseBytes(a));
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
@ -1746,6 +1861,26 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
return lanewise(ABS);
|
||||
}
|
||||
|
||||
static int bitCount(short a) {
|
||||
return Integer.bitCount((int)a & 0xFFFF);
|
||||
}
|
||||
static int numberOfTrailingZeros(short a) {
|
||||
return a != 0 ? Integer.numberOfTrailingZeros(a) : 16;
|
||||
}
|
||||
static int numberOfLeadingZeros(short a) {
|
||||
return a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0;
|
||||
}
|
||||
|
||||
static short reverse(short a) {
|
||||
if (a == 0 || a == -1) return a;
|
||||
|
||||
short b = rotateLeft(a, 8);
|
||||
b = (short) (((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
|
||||
b = (short) (((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
|
||||
b = (short) (((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
|
||||
return b;
|
||||
}
|
||||
|
||||
// not (~)
|
||||
/**
|
||||
* Computes the bitwise logical complement ({@code ~})
|
||||
@ -2372,6 +2507,45 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
ShortVector::toShuffle0);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
ShortVector compress(VectorMask<Short> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Short>>
|
||||
ShortVector compressTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (ShortVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
|
||||
short.class, length(), this, m,
|
||||
(v1, m1) -> compressHelper(v1, m1));
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
ShortVector expand(VectorMask<Short> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<Short>>
|
||||
ShortVector expandTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return (ShortVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
|
||||
short.class, length(), this, m,
|
||||
(v1, m1) -> expandHelper(v1, m1));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@ -2784,90 +2958,6 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
ShortVector fromByteArray(VectorSpecies<Short> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
|
||||
ShortSpecies vsp = (ShortSpecies) species;
|
||||
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code short} (zero).
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
ShortVector fromByteArray(VectorSpecies<Short> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Short> m) {
|
||||
ShortSpecies vsp = (ShortSpecies) species;
|
||||
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
|
||||
ByteBuffer wb = wrapper(a, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Short>)m,
|
||||
(wb_, o, i) -> wb_.getShort(o + i * 2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from an array of type {@code short[]}
|
||||
* starting at an offset.
|
||||
@ -3167,44 +3257,49 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer.
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
|
||||
* fromMemorySegment()} as follows:
|
||||
* <pre>{@code
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* return fromMemorySegment(species, ms, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*2 < 0}
|
||||
* or {@code offset+N*2 >= bb.limit()}
|
||||
* or {@code offset+N*2 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
ShortVector fromByteBuffer(VectorSpecies<Short> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
|
||||
ShortVector fromMemorySegment(VectorSpecies<Short> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
|
||||
ShortSpecies vsp = (ShortSpecies) species;
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code short} (zero).
|
||||
@ -3215,13 +3310,11 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
* <p>
|
||||
* The following pseudocode illustrates the behavior:
|
||||
* <pre>{@code
|
||||
* ShortBuffer eb = bb.duplicate()
|
||||
* .position(offset)
|
||||
* .order(bo).asShortBuffer();
|
||||
* var slice = ms.asSlice(offset);
|
||||
* short[] ar = new short[species.length()];
|
||||
* for (int n = 0; n < ar.length; n++) {
|
||||
* if (m.laneIsSet(n)) {
|
||||
* ar[n] = eb.get(n);
|
||||
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_SHORT.withBitAlignment(8), n);
|
||||
* }
|
||||
* }
|
||||
* ShortVector r = ShortVector.fromArray(species, ar, 0);
|
||||
@ -3235,33 +3328,36 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
* the bytes of lane values.
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*2 < 0}
|
||||
* or {@code offset+N*2 >= bb.limit()}
|
||||
* or {@code offset+N*2 >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
ShortVector fromByteBuffer(VectorSpecies<Short> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Short> m) {
|
||||
ShortVector fromMemorySegment(VectorSpecies<Short> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Short> m) {
|
||||
ShortSpecies vsp = (ShortSpecies) species;
|
||||
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
|
||||
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
|
||||
ByteBuffer wb = wrapper(bb, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<Short>)m,
|
||||
(wb_, o, i) -> wb_.getShort(o + i * 2));
|
||||
checkMaskFromIndexSize(offset, vsp, m, 2, ms.byteSize());
|
||||
return vsp.ldLongOp(ms, offset, m, ShortVector::memorySegmentGet);
|
||||
}
|
||||
|
||||
// Memory store operations
|
||||
@ -3291,7 +3387,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
this,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -3437,7 +3533,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
this,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
|
||||
}
|
||||
|
||||
@ -3567,67 +3663,40 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, byteSize(), a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Short> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteArray(a, offset, bo);
|
||||
} else {
|
||||
ShortSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset, m);
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
|
||||
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
if (ScopedMemoryAccess.isReadOnly(bb)) {
|
||||
throw new ReadOnlyBufferException();
|
||||
}
|
||||
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Short> m) {
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<Short> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteBuffer(bb, offset, bo);
|
||||
intoMemorySegment(ms, offset, bo);
|
||||
} else {
|
||||
if (bb.isReadOnly()) {
|
||||
throw new ReadOnlyBufferException();
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
ShortSpecies vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
|
||||
checkMaskFromIndexSize(offset, vsp, m, 2, ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3661,7 +3730,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3678,7 +3747,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3694,7 +3763,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, charArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> (short) arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -3711,79 +3780,38 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, charArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> (short) arr_[off_ + i]));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
abstract
|
||||
ShortVector fromByteArray0(byte[] a, int offset);
|
||||
ShortVector fromMemorySegment0(MemorySegment bb, long offset);
|
||||
@ForceInline
|
||||
final
|
||||
ShortVector fromByteArray0Template(byte[] a, int offset) {
|
||||
ShortVector fromMemorySegment0Template(MemorySegment ms, long offset) {
|
||||
ShortSpecies vsp = vspecies();
|
||||
return VectorSupport.load(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.getShort(o + i * 2));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
ShortVector fromByteArray0(byte[] a, int offset, VectorMask<Short> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Short>>
|
||||
ShortVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
ShortSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return VectorSupport.loadMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.getShort(o + i * 2));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
ShortVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
|
||||
ShortSpecies vsp = vspecies();
|
||||
return ScopedMemoryAccess.loadFromByteBuffer(
|
||||
return ScopedMemoryAccess.loadFromMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, vsp,
|
||||
(buf, off, s) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.getShort(o + i * 2));
|
||||
(AbstractMemorySegmentImpl) ms, offset, vsp,
|
||||
(msp, off, s) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, ShortVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
ShortVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m);
|
||||
ShortVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Short> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Short>>
|
||||
ShortVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
ShortVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
ShortSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return ScopedMemoryAccess.loadFromByteBufferMasked(
|
||||
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, m, vsp,
|
||||
(buf, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.getShort(o + i * 2));
|
||||
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
|
||||
(msp, off, s, vm) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, vm, ShortVector::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3802,7 +3830,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_+i] = e));
|
||||
}
|
||||
|
||||
@ -3819,77 +3847,39 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteArray0Template(byte[] a, int offset) {
|
||||
void intoMemorySegment0(MemorySegment ms, long offset) {
|
||||
ShortSpecies vsp = vspecies();
|
||||
VectorSupport.store(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<Short> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Short>>
|
||||
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
ShortSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
VectorSupport.storeMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset) {
|
||||
ShortSpecies vsp = vspecies();
|
||||
ScopedMemoryAccess.storeIntoByteBuffer(
|
||||
ScopedMemoryAccess.storeIntoMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
this, bb, offset,
|
||||
(buf, off, v) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
|
||||
this,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, ShortVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Short> m);
|
||||
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Short> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<Short>>
|
||||
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
ShortSpecies vsp = vspecies();
|
||||
m.check(vsp);
|
||||
ScopedMemoryAccess.storeIntoByteBufferMasked(
|
||||
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
this, m, bb, offset,
|
||||
(buf, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
|
||||
this, m,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v, vm) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, vm, ShortVector::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -3907,7 +3897,7 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
a, charArrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
|
||||
}
|
||||
|
||||
@ -3923,6 +3913,16 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
private static
|
||||
void checkMaskFromIndexSize(long offset,
|
||||
ShortSpecies vsp,
|
||||
VectorMask<Short> m,
|
||||
int scale,
|
||||
long limit) {
|
||||
((AbstractMask<Short>)m)
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private void conditionalStoreNYI(int offset,
|
||||
ShortSpecies vsp,
|
||||
@ -4250,6 +4250,21 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
return dummyVector().ldOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
ShortVector ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
ShortVector ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<Short> m,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
<M> void stOp(M memory, int offset, FStOp<M> f) {
|
||||
@ -4264,6 +4279,20 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
dummyVector().stOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
AbstractMask<Short> m,
|
||||
FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
// N.B. Make sure these constant vectors and
|
||||
// masks load up correctly into registers.
|
||||
//
|
||||
@ -4377,3 +4406,4 @@ public abstract class ShortVector extends AbstractVector<Short> {
|
||||
public static final VectorSpecies<Short> SPECIES_PREFERRED
|
||||
= (ShortSpecies) VectorSpecies.ofPreferred(short.class);
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,8 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.Arrays;
|
||||
|
||||
@ -763,11 +764,11 @@ import java.util.Arrays;
|
||||
* first vector lane value occupies the first position in memory, and so on,
|
||||
* up to the length of the vector. Further, the memory order of stored
|
||||
* vector lanes corresponds to increasing index values in a Java array or
|
||||
* in a {@link java.nio.ByteBuffer}.
|
||||
* in a {@link java.lang.foreign.MemorySegment}.
|
||||
*
|
||||
* <p> Byte order for lane storage is chosen such that the stored
|
||||
* vector values can be read or written as single primitive values,
|
||||
* within the array or buffer that holds the vector, producing the
|
||||
* within the array or segment that holds the vector, producing the
|
||||
* same values as the lane-wise values within the vector.
|
||||
* This fact is independent of the convenient fiction that lane values
|
||||
* inside of vectors are stored in little-endian order.
|
||||
@ -1039,6 +1040,12 @@ import java.util.Arrays;
|
||||
* can encode a mathematical permutation as well as many other
|
||||
* patterns of data movement.
|
||||
*
|
||||
* <li>The {@link #compress(VectorMask)} and {@link #expand(VectorMask)}
|
||||
* methods, which select up to {@code VLENGTH} lanes from an
|
||||
* input vector, and assemble them in lane order. The selection of lanes
|
||||
* is controlled by a {@code VectorMask}, with set lane elements mapping, by
|
||||
* compression or expansion in lane order, source lanes to destination lanes.
|
||||
*
|
||||
* </ul>
|
||||
* <p> Some vector operations are not lane-wise, but rather move data
|
||||
* across lane boundaries. Such operations are typically rare in SIMD
|
||||
@ -2689,6 +2696,46 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
|
||||
*/
|
||||
public abstract Vector<E> rearrange(VectorShuffle<E> s, Vector<E> v);
|
||||
|
||||
/**
|
||||
* Compresses the lane elements of this vector selecting lanes
|
||||
* under the control of a specific mask.
|
||||
*
|
||||
* This is a cross-lane operation that compresses the lane
|
||||
* elements of this vector as selected by the specified mask.
|
||||
*
|
||||
* For each lane {@code N} of the mask, if the mask at
|
||||
* lane {@code N} is set, the element at lane {@code N}
|
||||
* of input vector is selected and stored into the output
|
||||
* vector contiguously starting from the lane {@code 0}.
|
||||
* All the upper remaining lanes, if any, of the output
|
||||
* vector are set to zero.
|
||||
*
|
||||
* @param m the mask controlling the compression
|
||||
* @return the compressed lane elements of this vector
|
||||
* @since 19
|
||||
*/
|
||||
public abstract Vector<E> compress(VectorMask<E> m);
|
||||
|
||||
/**
|
||||
* Expands the lane elements of this vector
|
||||
* under the control of a specific mask.
|
||||
*
|
||||
* This is a cross-lane operation that expands the contiguous lane
|
||||
* elements of this vector into lanes of an output vector
|
||||
* as selected by the specified mask.
|
||||
*
|
||||
* For each lane {@code N} of the mask, if the mask at
|
||||
* lane {@code N} is set, the next contiguous element of input vector
|
||||
* starting from lane {@code 0} is selected and stored into the output
|
||||
* vector at lane {@code N}.
|
||||
* All the remaining lanes, if any, of the output vector are set to zero.
|
||||
*
|
||||
* @param m the mask controlling the compression
|
||||
* @return the expanded lane elements of this vector
|
||||
* @since 19
|
||||
*/
|
||||
public abstract Vector<E> expand(VectorMask<E> m);
|
||||
|
||||
/**
|
||||
* Using index values stored in the lanes of this vector,
|
||||
* assemble values stored in second vector {@code v}.
|
||||
@ -2854,9 +2901,8 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
|
||||
* implementation costs.
|
||||
*
|
||||
* <p> The method behaves as if this vector is stored into a byte
|
||||
* buffer or array using little-endian byte ordering and then the
|
||||
* desired vector is loaded from the same byte buffer or array
|
||||
* using the same ordering.
|
||||
* array using little-endian byte ordering and then the desired vector is loaded from the same byte
|
||||
* array using the same ordering.
|
||||
*
|
||||
* <p> The following pseudocode illustrates the behavior:
|
||||
* <pre>{@code
|
||||
@ -2865,15 +2911,15 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
|
||||
* int M = (domSize > ranSize ? domSize / ranSize : ranSize / domSize);
|
||||
* assert Math.abs(part) < M;
|
||||
* assert (part == 0) || (part > 0) == (domSize > ranSize);
|
||||
* byte[] ra = new byte[Math.max(domSize, ranSize)];
|
||||
* MemorySegment ms = MemorySegment.ofArray(new byte[Math.max(domSize, ranSize)]);
|
||||
* if (domSize > ranSize) { // expansion
|
||||
* this.intoByteArray(ra, 0, ByteOrder.native());
|
||||
* this.intoMemorySegment(ms, 0, ByteOrder.native());
|
||||
* int origin = part * ranSize;
|
||||
* return species.fromByteArray(ra, origin, ByteOrder.native());
|
||||
* return species.fromMemorySegment(ms, origin, ByteOrder.native());
|
||||
* } else { // contraction or size-invariant
|
||||
* int origin = (-part) * domSize;
|
||||
* this.intoByteArray(ra, origin, ByteOrder.native());
|
||||
* return species.fromByteArray(ra, 0, ByteOrder.native());
|
||||
* this.intoMemorySegment(ms, origin, ByteOrder.native());
|
||||
* return species.fromMemorySegment(ms, 0, ByteOrder.native());
|
||||
* }
|
||||
* }</pre>
|
||||
*
|
||||
@ -2910,8 +2956,8 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
|
||||
*
|
||||
* @return a {@code ByteVector} with the same shape and information content
|
||||
* @see Vector#reinterpretShape(VectorSpecies,int)
|
||||
* @see IntVector#intoByteArray(byte[], int, ByteOrder)
|
||||
* @see FloatVector#intoByteArray(byte[], int, ByteOrder)
|
||||
* @see IntVector#intoMemorySegment(java.lang.foreign.MemorySegment, long, java.nio.ByteOrder)
|
||||
* @see FloatVector#intoMemorySegment(java.lang.foreign.MemorySegment, long, java.nio.ByteOrder)
|
||||
* @see VectorSpecies#withLanes(Class)
|
||||
*/
|
||||
public abstract ByteVector reinterpretAsBytes();
|
||||
@ -3319,8 +3365,8 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
|
||||
//Array stores
|
||||
|
||||
/**
|
||||
* Stores this vector into a byte array starting at an offset
|
||||
* using explicit byte order.
|
||||
* Stores this vector into a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset using explicit byte order.
|
||||
* <p>
|
||||
* Bytes are extracted from primitive lane elements according
|
||||
* to the specified byte ordering.
|
||||
@ -3328,88 +3374,33 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it calls
|
||||
* {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* intoByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* var m = maskAll(true);
|
||||
* intoByteBuffer(bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
*/
|
||||
public abstract void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo);
|
||||
|
||||
/**
|
||||
* Stores this vector into a byte array starting at an offset
|
||||
* using explicit byte order and a mask.
|
||||
* <p>
|
||||
* Bytes are extracted from primitive lane elements according
|
||||
* to the specified byte ordering.
|
||||
* The lanes are stored according to their
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it calls
|
||||
* {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* intoByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* intoByteBuffer(bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
*/
|
||||
public abstract void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<E> m);
|
||||
|
||||
/**
|
||||
* Stores this vector into a byte buffer starting at an offset
|
||||
* using explicit byte order.
|
||||
* <p>
|
||||
* Bytes are extracted from primitive lane elements according
|
||||
* to the specified byte ordering.
|
||||
* The lanes are stored according to their
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it calls
|
||||
* {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* intoByteBuffer()} as follows:
|
||||
* {@link #intoMemorySegment(MemorySegment,long,ByteOrder,VectorMask)
|
||||
* intoMemorySegment()} as follows:
|
||||
* <pre>{@code
|
||||
* var m = maskAll(true);
|
||||
* intoByteBuffer(bb, offset, bo, m);
|
||||
* intoMemorySegment(ms, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the array
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > bb.limit()}
|
||||
* or {@code offset+(N+1)*ESIZE > ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* @throws java.nio.ReadOnlyBufferException
|
||||
* if the byte buffer is read-only
|
||||
* @throws UnsupportedOperationException
|
||||
* if the memory segment is read-only
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
public abstract void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo);
|
||||
public abstract void intoMemorySegment(MemorySegment ms, long offset, ByteOrder bo);
|
||||
|
||||
/**
|
||||
* Stores this vector into a byte buffer starting at an offset
|
||||
* using explicit byte order and a mask.
|
||||
* Stores this vector into a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset using explicit byte order and a mask.
|
||||
* <p>
|
||||
* Bytes are extracted from primitive lane elements according
|
||||
* to the specified byte ordering.
|
||||
@ -3417,28 +3408,18 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* The following pseudocode illustrates the behavior, where
|
||||
* the primitive element type is not of {@code byte},
|
||||
* {@code EBuffer} is the primitive buffer type, {@code ETYPE} is the
|
||||
* {@code JAVA_E} is the layout of the primitive element type, {@code ETYPE} is the
|
||||
* primitive element type, and {@code EVector} is the primitive
|
||||
* vector type for this vector:
|
||||
* <pre>{@code
|
||||
* EBuffer eb = bb.duplicate()
|
||||
* .position(offset)
|
||||
* .order(bo).asEBuffer();
|
||||
* ETYPE[] a = this.toArray();
|
||||
* var slice = ms.asSlice(offset)
|
||||
* for (int n = 0; n < a.length; n++) {
|
||||
* if (m.laneIsSet(n)) {
|
||||
* eb.put(n, a[n]);
|
||||
* slice.setAtIndex(ValueLayout.JAVA_E.withBitAlignment(8), n);
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* When the primitive element type is of {@code byte} the primitive
|
||||
* byte buffer is obtained as follows, where operation on the buffer
|
||||
* remains the same as in the prior pseudocode:
|
||||
* <pre>{@code
|
||||
* ByteBuffer eb = bb.duplicate()
|
||||
* .position(offset);
|
||||
* }</pre>
|
||||
*
|
||||
* @implNote
|
||||
* This operation is likely to be more efficient if
|
||||
@ -3451,20 +3432,25 @@ public abstract class Vector<E> extends jdk.internal.vm.vector.VectorSupport.Vec
|
||||
* {@code byte}, the byte order argument is
|
||||
* ignored.
|
||||
*
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the array
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > bb.limit()}
|
||||
* or {@code offset+(N+1)*ESIZE > ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
* @throws java.nio.ReadOnlyBufferException
|
||||
* if the byte buffer is read-only
|
||||
* @throws UnsupportedOperationException
|
||||
* if the memory segment is read-only
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
public abstract void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo, VectorMask<E> m);
|
||||
public abstract void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo, VectorMask<E> m);
|
||||
|
||||
/**
|
||||
* Returns a packed array containing all the lane values.
|
||||
|
||||
@ -54,6 +54,16 @@ import java.util.Objects;
|
||||
}
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
static long checkFromIndexSize(long ix, long vlen, long length) {
|
||||
switch (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK) {
|
||||
case 0: return ix; // no range check
|
||||
case 1: return Objects.checkFromIndexSize(ix, vlen, length);
|
||||
case 2: return Objects.checkIndex(ix, length - (vlen - 1));
|
||||
default: throw new InternalError();
|
||||
}
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
static IntVector checkIndex(IntVector vix, int length) {
|
||||
switch (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK) {
|
||||
@ -92,9 +102,30 @@ import java.util.Objects;
|
||||
if (index >= 0) {
|
||||
return index - (index % size);
|
||||
} else {
|
||||
return index - Math.floorMod(index, Math.abs(size));
|
||||
return index - Math.floorMod(index, size);
|
||||
}
|
||||
}
|
||||
|
||||
// If the index is not already a multiple of size,
|
||||
// round it down to the next smaller multiple of size.
|
||||
// It is an error if size is less than zero.
|
||||
@ForceInline
|
||||
static long roundDown(long index, int size) {
|
||||
if ((size & (size - 1)) == 0) {
|
||||
// Size is zero or a power of two, so we got this.
|
||||
return index & ~(size - 1);
|
||||
} else {
|
||||
return roundDownNPOT(index, size);
|
||||
}
|
||||
}
|
||||
private static long roundDownNPOT(long index, int size) {
|
||||
if (index >= 0) {
|
||||
return index - (index % size);
|
||||
} else {
|
||||
return index - Math.floorMod(index, size);
|
||||
}
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
static int wrapToRange(int index, int size) {
|
||||
if ((size & (size - 1)) == 0) {
|
||||
|
||||
@ -210,7 +210,7 @@ public abstract class VectorMask<E> extends jdk.internal.vm.vector.VectorSupport
|
||||
bits, (long) offset + Unsafe.ARRAY_BOOLEAN_BASE_OFFSET,
|
||||
bits, offset, vsp,
|
||||
(c, idx, s)
|
||||
-> s.opm(n -> c[idx + n]));
|
||||
-> s.opm(n -> c[((int )idx) + n]));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -471,6 +471,39 @@ public abstract class VectorMask<E> extends jdk.internal.vm.vector.VectorSupport
|
||||
*/
|
||||
public abstract VectorMask<E> indexInRange(int offset, int limit);
|
||||
|
||||
/**
|
||||
* Removes lanes numbered {@code N} from this mask where the
|
||||
* adjusted index {@code N+offset}, is not in the range
|
||||
* {@code [0..limit-1]}.
|
||||
*
|
||||
* <p> In all cases the series of set and unset lanes is assigned
|
||||
* as if by using infinite precision or {@code VLENGTH-}saturating
|
||||
* additions or subtractions, without overflow or wrap-around.
|
||||
*
|
||||
* @apiNote
|
||||
*
|
||||
* This method performs a SIMD emulation of the check performed by
|
||||
* {@link Objects#checkIndex(long,long)}, on the index numbers in
|
||||
* the range {@code [offset..offset+VLENGTH-1]}. If an exception
|
||||
* is desired, the resulting mask can be compared with the
|
||||
* original mask; if they are not equal, then at least one lane
|
||||
* was out of range, and exception processing can be performed.
|
||||
*
|
||||
* <p> A mask which is a series of {@code N} set lanes followed by
|
||||
* a series of unset lanes can be obtained by calling
|
||||
* {@code allTrue.indexInRange(0, N)}, where {@code allTrue} is a
|
||||
* mask of all true bits. A mask of {@code N1} unset lanes
|
||||
* followed by {@code N2} set lanes can be obtained by calling
|
||||
* {@code allTrue.indexInRange(-N1, N2)}.
|
||||
*
|
||||
* @param offset the starting index
|
||||
* @param limit the upper-bound (exclusive) of index range
|
||||
* @return the original mask, with out-of-range lanes unset
|
||||
* @see VectorSpecies#indexInRange(long, long)
|
||||
* @since 19
|
||||
*/
|
||||
public abstract VectorMask<E> indexInRange(long offset, long limit);
|
||||
|
||||
/**
|
||||
* Returns a vector representation of this mask, the
|
||||
* lane bits of which are set or unset in correspondence
|
||||
@ -621,6 +654,18 @@ public abstract class VectorMask<E> extends jdk.internal.vm.vector.VectorSupport
|
||||
return Objects.hash(vectorSpecies(), Arrays.hashCode(toArray()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Compresses set lanes from this mask.
|
||||
*
|
||||
* Returns a mask which is a series of {@code N} set lanes
|
||||
* followed by a series of unset lanes, where {@code N} is
|
||||
* the true count of this mask.
|
||||
*
|
||||
* @return the compressed mask of this mask
|
||||
* @since 19
|
||||
*/
|
||||
public abstract VectorMask<E> compress();
|
||||
|
||||
// ==== JROSE NAME CHANGES ====
|
||||
|
||||
// TYPE CHANGED
|
||||
|
||||
@ -452,6 +452,26 @@ public abstract class VectorOperators {
|
||||
public static final Unary ABS = unary("ABS", "abs", VectorSupport.VECTOR_OP_ABS, VO_ALL);
|
||||
/** Produce {@code -a}. */
|
||||
public static final Unary NEG = unary("NEG", "-a", VectorSupport.VECTOR_OP_NEG, VO_ALL|VO_SPECIAL);
|
||||
/** Produce {@code bitCount(a)}
|
||||
* @since 19
|
||||
*/
|
||||
public static final Unary BIT_COUNT = unary("BIT_COUNT", "bitCount", VectorSupport.VECTOR_OP_BIT_COUNT, VO_NOFP);
|
||||
/** Produce {@code numberOfTrailingZeros(a)}
|
||||
* @since 19
|
||||
*/
|
||||
public static final Unary TRAILING_ZEROS_COUNT = unary("TRAILING_ZEROS_COUNT", "numberOfTrailingZeros", VectorSupport.VECTOR_OP_TZ_COUNT, VO_NOFP);
|
||||
/** Produce {@code numberOfLeadingZeros(a)}
|
||||
* @since 19
|
||||
*/
|
||||
public static final Unary LEADING_ZEROS_COUNT = unary("LEADING_ZEROS_COUNT", "numberOfLeadingZeros", VectorSupport.VECTOR_OP_LZ_COUNT, VO_NOFP);
|
||||
/** Produce {@code reverse(a)}
|
||||
* @since 19
|
||||
*/
|
||||
public static final Unary REVERSE = unary("REVERSE", "reverse", VectorSupport.VECTOR_OP_REVERSE, VO_NOFP);
|
||||
/** Produce {@code reverseBytes(a)}
|
||||
* @since 19
|
||||
*/
|
||||
public static final Unary REVERSE_BYTES = unary("REVERSE_BYTES", "reverseBytes", VectorSupport.VECTOR_OP_REVERSE_BYTES, VO_NOFP);
|
||||
|
||||
/** Produce {@code sin(a)}. Floating only.
|
||||
* Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
|
||||
@ -556,6 +576,14 @@ public abstract class VectorOperators {
|
||||
public static final /*bitwise*/ Binary ROL = binary("ROL", "rotateLeft", VectorSupport.VECTOR_OP_LROTATE, VO_SHIFT);
|
||||
/** Produce {@code rotateRight(a,n)}. Integral only. */
|
||||
public static final /*bitwise*/ Binary ROR = binary("ROR", "rotateRight", VectorSupport.VECTOR_OP_RROTATE, VO_SHIFT);
|
||||
/** Produce {@code compress(a,n)}. Integral, {@code int} and {@code long}, only.
|
||||
* @since 19
|
||||
*/
|
||||
public static final /*bitwise*/ Binary COMPRESS_BITS = binary("COMPRESS_BITS", "compressBits", VectorSupport.VECTOR_OP_COMPRESS_BITS, VO_NOFP);
|
||||
/** Produce {@code expand(a,n)}. Integral, {@code int} and {@code long}, only.
|
||||
* @since 19
|
||||
*/
|
||||
public static final /*bitwise*/ Binary EXPAND_BITS = binary("EXPAND_BITS", "expandBits", VectorSupport.VECTOR_OP_EXPAND_BITS, VO_NOFP);
|
||||
|
||||
/** Produce {@code atan2(a,b)}. See Floating only.
|
||||
* Not guaranteed to be semi-monotonic. See section "Operations on floating point vectors" above
|
||||
|
||||
@ -24,6 +24,8 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
@ -149,11 +151,37 @@ public interface VectorSpecies<E> {
|
||||
* @return the largest multiple of the vector length not greater
|
||||
* than the given length
|
||||
* @throws IllegalArgumentException if the {@code length} is
|
||||
negative and the result would overflow to a positive value
|
||||
* negative and the result would overflow to a positive value
|
||||
* @see Math#floorMod(int, int)
|
||||
*/
|
||||
int loopBound(int length);
|
||||
|
||||
/**
|
||||
* Loop control function which returns the largest multiple of
|
||||
* {@code VLENGTH} that is less than or equal to the given
|
||||
* {@code length} value.
|
||||
* Here, {@code VLENGTH} is the result of {@code this.length()},
|
||||
* and {@code length} is interpreted as a number of lanes.
|
||||
* The resulting value {@code R} satisfies this inequality:
|
||||
* <pre>{@code R <= length < R+VLENGTH}
|
||||
* </pre>
|
||||
* <p> Specifically, this method computes
|
||||
* {@code length - floorMod(length, VLENGTH)}, where
|
||||
* {@link Math#floorMod(long,int) floorMod} computes a remainder
|
||||
* value by rounding its quotient toward negative infinity.
|
||||
* As long as {@code VLENGTH} is a power of two, then the result
|
||||
* is also equal to {@code length & ~(VLENGTH - 1)}.
|
||||
*
|
||||
* @param length the input length
|
||||
* @return the largest multiple of the vector length not greater
|
||||
* than the given length
|
||||
* @throws IllegalArgumentException if the {@code length} is
|
||||
* negative and the result would overflow to a positive value
|
||||
* @see Math#floorMod(long, int)
|
||||
* @since 19
|
||||
*/
|
||||
long loopBound(long length);
|
||||
|
||||
/**
|
||||
* Returns a mask of this species where only
|
||||
* the lanes at index N such that the adjusted index
|
||||
@ -171,6 +199,24 @@ public interface VectorSpecies<E> {
|
||||
*/
|
||||
VectorMask<E> indexInRange(int offset, int limit);
|
||||
|
||||
/**
|
||||
* Returns a mask of this species where only
|
||||
* the lanes at index N such that the adjusted index
|
||||
* {@code N+offset} is in the range {@code [0..limit-1]}
|
||||
* are set.
|
||||
*
|
||||
* <p>
|
||||
* This method returns the value of the expression
|
||||
* {@code maskAll(true).indexInRange(offset, limit)}
|
||||
*
|
||||
* @param offset the starting index
|
||||
* @param limit the upper-bound (exclusive) of index range
|
||||
* @return a mask with out-of-range lanes unset
|
||||
* @see VectorMask#indexInRange(long, long)
|
||||
* @since 19
|
||||
*/
|
||||
VectorMask<E> indexInRange(long offset, long limit);
|
||||
|
||||
/**
|
||||
* Checks that this species has the given element type,
|
||||
* and returns this species unchanged.
|
||||
@ -433,31 +479,31 @@ public interface VectorSpecies<E> {
|
||||
// Defined when ETYPE is known.
|
||||
|
||||
/**
|
||||
* Loads a vector of this species from a byte array starting
|
||||
* at an offset.
|
||||
* Loads a vector of this species from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* Equivalent to
|
||||
* {@code IntVector.fromByteArray(this,a,offset,bo)}
|
||||
* or an equivalent {@code fromByteArray} method,
|
||||
* {@code IntVector.fromMemorySegment(this,ms,offset,bo)},
|
||||
* on the vector type corresponding to
|
||||
* this species.
|
||||
*
|
||||
* @param a a byte array
|
||||
* @param offset the index of the first byte to load
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @return a vector of the given species filled from the byte array
|
||||
* @return a vector of the given species filled from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
* @see IntVector#fromByteArray(VectorSpecies,byte[],int,ByteOrder)
|
||||
* @see FloatVector#fromByteArray(VectorSpecies,byte[],int,ByteOrder)
|
||||
* @see IntVector#fromMemorySegment(VectorSpecies, java.lang.foreign.MemorySegment, long, java.nio.ByteOrder)
|
||||
* @see FloatVector#fromMemorySegment(VectorSpecies, java.lang.foreign.MemorySegment, long, java.nio.ByteOrder)
|
||||
* @since 19
|
||||
*/
|
||||
Vector<E> fromByteArray(byte[] a, int offset, ByteOrder bo);
|
||||
Vector<E> fromMemorySegment(MemorySegment ms, long offset, ByteOrder bo);
|
||||
|
||||
/**
|
||||
* Returns a mask of this species
|
||||
|
||||
@ -24,14 +24,14 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ReadOnlyBufferException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.UnaryOperator;
|
||||
|
||||
import jdk.internal.foreign.AbstractMemorySegmentImpl;
|
||||
import jdk.internal.misc.ScopedMemoryAccess;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
@ -61,6 +61,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
|
||||
#end[FP]
|
||||
|
||||
static final ValueLayout.Of$Type$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withBitAlignment(8);
|
||||
|
||||
@ForceInline
|
||||
static int opCode(Operator op) {
|
||||
return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
|
||||
@ -355,6 +357,45 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
interface FLdLongOp {
|
||||
$type$ apply(MemorySegment memory, long offset, int i);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
$abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
//dummy; no vec = vec();
|
||||
$type$[] res = new $type$[length()];
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
$abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<$Boxtype$> m,
|
||||
FLdLongOp f) {
|
||||
//$type$[] vec = vec();
|
||||
$type$[] res = new $type$[length()];
|
||||
boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
if (mbits[i]) {
|
||||
res[i] = f.apply(memory, offset, i);
|
||||
}
|
||||
}
|
||||
return vectorFactory(res);
|
||||
}
|
||||
|
||||
static $type$ memorySegmentGet(MemorySegment ms, long o, int i) {
|
||||
return ms.get(ELEMENT_LAYOUT, o + i * $sizeInBytes$L);
|
||||
}
|
||||
|
||||
interface FStOp<M> {
|
||||
void apply(M memory, int offset, int i, $type$ a);
|
||||
}
|
||||
@ -385,6 +426,40 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
}
|
||||
}
|
||||
|
||||
interface FStLongOp {
|
||||
void apply(MemorySegment memory, long offset, int i, $type$ a);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
FStLongOp f) {
|
||||
$type$[] vec = vec();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<$Boxtype$> m,
|
||||
FStLongOp f) {
|
||||
$type$[] vec = vec();
|
||||
boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
|
||||
for (int i = 0; i < vec.length; i++) {
|
||||
if (mbits[i]) {
|
||||
f.apply(memory, offset, i, vec[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void memorySegmentSet(MemorySegment ms, long o, int i, $type$ e) {
|
||||
ms.set(ELEMENT_LAYOUT, o + i * $sizeInBytes$L, e);
|
||||
}
|
||||
|
||||
// Binary test
|
||||
|
||||
/*package-private*/
|
||||
@ -445,6 +520,36 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
return {#if[FP]?$Type$.$bitstype$BitsTo$Type$}(($bitstype$)bits);
|
||||
}
|
||||
|
||||
static $abstractvectortype$ expandHelper(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
|
||||
VectorSpecies<$Boxtype$> vsp = m.vectorSpecies();
|
||||
$abstractvectortype$ r = ($abstractvectortype$) vsp.zero();
|
||||
$abstractvectortype$ vi = ($abstractvectortype$) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(i, vi.lane(j++));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static $abstractvectortype$ compressHelper(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
|
||||
VectorSpecies<$Boxtype$> vsp = m.vectorSpecies();
|
||||
$abstractvectortype$ r = ($abstractvectortype$) vsp.zero();
|
||||
$abstractvectortype$ vi = ($abstractvectortype$) v;
|
||||
if (m.allTrue()) {
|
||||
return vi;
|
||||
}
|
||||
for (int i = 0, j = 0; i < vsp.length(); i++) {
|
||||
if (m.laneIsSet(i)) {
|
||||
r = r.withLane(j++, vi.lane(i));
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// Static factories (other than memory operations)
|
||||
|
||||
// Note: A surprising behavior in javadoc
|
||||
@ -646,6 +751,36 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
v0.uOp(m, (i, a) -> ($type$) -a);
|
||||
case VECTOR_OP_ABS: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> ($type$) Math.abs(a));
|
||||
#if[!FP]
|
||||
#if[intOrLong]
|
||||
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.bitCount(a));
|
||||
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfTrailingZeros(a));
|
||||
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfLeadingZeros(a));
|
||||
case VECTOR_OP_REVERSE: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverse(a));
|
||||
#else[intOrLong]
|
||||
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> ($type$) bitCount(a));
|
||||
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> ($type$) numberOfTrailingZeros(a));
|
||||
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> ($type$) numberOfLeadingZeros(a));
|
||||
case VECTOR_OP_REVERSE: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> reverse(a));
|
||||
#end[intOrLong]
|
||||
#if[BITWISE]
|
||||
#if[byte]
|
||||
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> a);
|
||||
#else[byte]
|
||||
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverseBytes(a));
|
||||
#end[byte]
|
||||
#end[BITWISE]
|
||||
#end[!FP]
|
||||
#if[FP]
|
||||
case VECTOR_OP_SIN: return (v0, m) ->
|
||||
v0.uOp(m, (i, a) -> ($type$) Math.sin(a));
|
||||
@ -839,6 +974,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
|
||||
case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
|
||||
v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
|
||||
#if[intOrLong]
|
||||
case VECTOR_OP_COMPRESS_BITS: return (v0, v1, vm) ->
|
||||
v0.bOp(v1, vm, (i, a, n) -> $Boxtype$.compress(a, n));
|
||||
case VECTOR_OP_EXPAND_BITS: return (v0, v1, vm) ->
|
||||
v0.bOp(v1, vm, (i, a, n) -> $Boxtype$.expand(a, n));
|
||||
#end[intOrLong]
|
||||
#end[BITWISE]
|
||||
#if[FP]
|
||||
case VECTOR_OP_OR: return (v0, v1, vm) ->
|
||||
@ -1987,6 +2128,56 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
return lanewise(ABS);
|
||||
}
|
||||
|
||||
#if[!FP]
|
||||
#if[!intOrLong]
|
||||
static int bitCount($type$ a) {
|
||||
#if[short]
|
||||
return Integer.bitCount((int)a & 0xFFFF);
|
||||
#else[short]
|
||||
return Integer.bitCount((int)a & 0xFF);
|
||||
#end[short]
|
||||
}
|
||||
#end[!intOrLong]
|
||||
#end[!FP]
|
||||
#if[!FP]
|
||||
#if[!intOrLong]
|
||||
static int numberOfTrailingZeros($type$ a) {
|
||||
#if[short]
|
||||
return a != 0 ? Integer.numberOfTrailingZeros(a) : 16;
|
||||
#else[short]
|
||||
return a != 0 ? Integer.numberOfTrailingZeros(a) : 8;
|
||||
#end[short]
|
||||
}
|
||||
#end[!intOrLong]
|
||||
#end[!FP]
|
||||
#if[!FP]
|
||||
#if[!intOrLong]
|
||||
static int numberOfLeadingZeros($type$ a) {
|
||||
#if[short]
|
||||
return a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0;
|
||||
#else[short]
|
||||
return a >= 0 ? Integer.numberOfLeadingZeros(a) - 24 : 0;
|
||||
#end[short]
|
||||
}
|
||||
|
||||
static $type$ reverse($type$ a) {
|
||||
if (a == 0 || a == -1) return a;
|
||||
|
||||
#if[short]
|
||||
$type$ b = rotateLeft(a, 8);
|
||||
b = ($type$) (((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
|
||||
b = ($type$) (((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
|
||||
b = ($type$) (((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
|
||||
#else[short]
|
||||
$type$ b = rotateLeft(a, 4);
|
||||
b = ($type$) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
|
||||
b = ($type$) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
|
||||
#end[short]
|
||||
return b;
|
||||
}
|
||||
#end[!intOrLong]
|
||||
#end[!FP]
|
||||
|
||||
#if[BITWISE]
|
||||
// not (~)
|
||||
/**
|
||||
@ -2695,6 +2886,45 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
$Type$Vector::toShuffle0);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
$Type$Vector compress(VectorMask<$Boxtype$> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<$Boxtype$>>
|
||||
$Type$Vector compressTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return ($Type$Vector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
|
||||
$type$.class, length(), this, m,
|
||||
(v1, m1) -> compressHelper(v1, m1));
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
public abstract
|
||||
$Type$Vector expand(VectorMask<$Boxtype$> m);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
<M extends AbstractMask<$Boxtype$>>
|
||||
$Type$Vector expandTemplate(Class<M> masktype, M m) {
|
||||
m.check(masktype, this);
|
||||
return ($Type$Vector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
|
||||
$type$.class, length(), this, m,
|
||||
(v1, m1) -> expandHelper(v1, m1));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@ -3302,90 +3532,6 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
}
|
||||
#end[double]
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
$abstractvectortype$ fromByteArray(VectorSpecies<$Boxtype$> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
|
||||
$Type$Species vsp = ($Type$Species) species;
|
||||
return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a byte array starting at an offset
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code $type$} ({#if[FP]?positive }zero).
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* <pre>{@code
|
||||
* var bb = ByteBuffer.wrap(a);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param a the byte array
|
||||
* @param offset the offset into the array
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte array
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*ESIZE < 0}
|
||||
* or {@code offset+(N+1)*ESIZE > a.length}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
$abstractvectortype$ fromByteArray(VectorSpecies<$Boxtype$> species,
|
||||
byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<$Boxtype$> m) {
|
||||
$Type$Species vsp = ($Type$Species) species;
|
||||
if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, a.length);
|
||||
ByteBuffer wb = wrapper(a, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<$Boxtype$>)m,
|
||||
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from an array of type {@code $type$[]}
|
||||
* starting at an offset.
|
||||
@ -3917,44 +4063,49 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
#end[byte]
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer.
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment.
|
||||
* Bytes are composed into primitive lane elements according
|
||||
* to the specified byte order.
|
||||
* The vector is arranged into lanes according to
|
||||
* <a href="Vector.html#lane-order">memory ordering</a>.
|
||||
* <p>
|
||||
* This method behaves as if it returns the result of calling
|
||||
* {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
|
||||
* fromByteBuffer()} as follows:
|
||||
* {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
|
||||
* fromMemorySegment()} as follows:
|
||||
* <pre>{@code
|
||||
* var m = species.maskAll(true);
|
||||
* return fromByteBuffer(species, bb, offset, bo, m);
|
||||
* return fromMemorySegment(species, ms, offset, bo, m);
|
||||
* }</pre>
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*$sizeInBytes$ < 0}
|
||||
* or {@code offset+N*$sizeInBytes$ >= bb.limit()}
|
||||
* or {@code offset+N*$sizeInBytes$ >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
$abstractvectortype$ fromByteBuffer(VectorSpecies<$Boxtype$> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
|
||||
$abstractvectortype$ fromMemorySegment(VectorSpecies<$Boxtype$> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
|
||||
$Type$Species vsp = ($Type$Species) species;
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a vector from a {@linkplain ByteBuffer byte buffer}
|
||||
* starting at an offset into the byte buffer
|
||||
* Loads a vector from a {@linkplain MemorySegment memory segment}
|
||||
* starting at an offset into the memory segment
|
||||
* and using a mask.
|
||||
* Lanes where the mask is unset are filled with the default
|
||||
* value of {@code $type$} ({#if[FP]?positive }zero).
|
||||
@ -3965,15 +4116,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
* <p>
|
||||
* The following pseudocode illustrates the behavior:
|
||||
* <pre>{@code
|
||||
* $Type$Buffer eb = bb.duplicate()
|
||||
* .position(offset){#if[byte]?;}
|
||||
#if[!byte]
|
||||
* .order(bo).as$Type$Buffer();
|
||||
#end[!byte]
|
||||
* var slice = ms.asSlice(offset);
|
||||
* $type$[] ar = new $type$[species.length()];
|
||||
* for (int n = 0; n < ar.length; n++) {
|
||||
* if (m.laneIsSet(n)) {
|
||||
* ar[n] = eb.get(n);
|
||||
* ar[n] = slice.getAtIndex(ValuaLayout.JAVA_$TYPE$.withBitAlignment(8), n);
|
||||
* }
|
||||
* }
|
||||
* $abstractvectortype$ r = $abstractvectortype$.fromArray(species, ar, 0);
|
||||
@ -3991,33 +4138,36 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
#end[!byte]
|
||||
*
|
||||
* @param species species of desired vector
|
||||
* @param bb the byte buffer
|
||||
* @param offset the offset into the byte buffer
|
||||
* @param ms the memory segment
|
||||
* @param offset the offset into the memory segment
|
||||
* @param bo the intended byte order
|
||||
* @param m the mask controlling lane selection
|
||||
* @return a vector loaded from a byte buffer
|
||||
* @return a vector loaded from the memory segment
|
||||
* @throws IndexOutOfBoundsException
|
||||
* if {@code offset+N*$sizeInBytes$ < 0}
|
||||
* or {@code offset+N*$sizeInBytes$ >= bb.limit()}
|
||||
* or {@code offset+N*$sizeInBytes$ >= ms.byteSize()}
|
||||
* for any lane {@code N} in the vector
|
||||
* where the mask is set
|
||||
* @throws IllegalArgumentException if the memory segment is a heap segment that is
|
||||
* not backed by a {@code byte[]} array.
|
||||
* @throws IllegalStateException if the memory segment's session is not alive,
|
||||
* or if access occurs from a thread other than the thread owning the session.
|
||||
* @since 19
|
||||
*/
|
||||
@ForceInline
|
||||
public static
|
||||
$abstractvectortype$ fromByteBuffer(VectorSpecies<$Boxtype$> species,
|
||||
ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<$Boxtype$> m) {
|
||||
$abstractvectortype$ fromMemorySegment(VectorSpecies<$Boxtype$> species,
|
||||
MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<$Boxtype$> m) {
|
||||
$Type$Species vsp = ($Type$Species) species;
|
||||
if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
|
||||
if (offset >= 0 && offset <= (ms.byteSize() - species.vectorByteSize())) {
|
||||
return vsp.dummyVector().fromMemorySegment0(ms, offset, m).maybeSwap(bo);
|
||||
}
|
||||
|
||||
// FIXME: optimize
|
||||
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, bb.limit());
|
||||
ByteBuffer wb = wrapper(bb, bo);
|
||||
return vsp.ldOp(wb, offset, (AbstractMask<$Boxtype$>)m,
|
||||
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
|
||||
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, ms.byteSize());
|
||||
return vsp.ldLongOp(ms, offset, m, $abstractvectortype$::memorySegmentGet);
|
||||
}
|
||||
|
||||
// Memory store operations
|
||||
@ -4047,7 +4197,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
this,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -4264,7 +4414,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
this,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
|
||||
}
|
||||
|
||||
@ -4423,7 +4573,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
normalized,
|
||||
a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
|
||||
}
|
||||
|
||||
@ -4562,67 +4712,40 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo) {
|
||||
offset = checkFromIndexSize(offset, byteSize(), a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteArray(byte[] a, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<$Boxtype$> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteArray(a, offset, bo);
|
||||
} else {
|
||||
$Type$Species vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, a.length);
|
||||
maybeSwap(bo).intoByteArray0(a, offset, m);
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo) {
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
|
||||
offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
* @since 19
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo) {
|
||||
if (ScopedMemoryAccess.isReadOnly(bb)) {
|
||||
throw new ReadOnlyBufferException();
|
||||
}
|
||||
offset = checkFromIndexSize(offset, byteSize(), bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
void intoByteBuffer(ByteBuffer bb, int offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<$Boxtype$> m) {
|
||||
void intoMemorySegment(MemorySegment ms, long offset,
|
||||
ByteOrder bo,
|
||||
VectorMask<$Boxtype$> m) {
|
||||
if (m.allTrue()) {
|
||||
intoByteBuffer(bb, offset, bo);
|
||||
intoMemorySegment(ms, offset, bo);
|
||||
} else {
|
||||
if (bb.isReadOnly()) {
|
||||
throw new ReadOnlyBufferException();
|
||||
if (ms.isReadOnly()) {
|
||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
||||
}
|
||||
$Type$Species vsp = vspecies();
|
||||
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, bb.limit());
|
||||
maybeSwap(bo).intoByteBuffer0(bb, offset, m);
|
||||
checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, ms.byteSize());
|
||||
maybeSwap(bo).intoMemorySegment0(ms, offset, m);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4656,7 +4779,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -4673,7 +4796,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, arrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -4750,7 +4873,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, charArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> (short) arr_[off_ + i]));
|
||||
}
|
||||
|
||||
@ -4767,7 +4890,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, charArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> (short) arr_[off_ + i]));
|
||||
}
|
||||
#end[short]
|
||||
@ -4784,7 +4907,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, booleanArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> s.ldOp(arr, off,
|
||||
(arr, off, s) -> s.ldOp(arr, (int) off,
|
||||
(arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
|
||||
}
|
||||
|
||||
@ -4801,79 +4924,38 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, booleanArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, off, vm,
|
||||
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
|
||||
(arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
|
||||
}
|
||||
#end[byte]
|
||||
|
||||
@Override
|
||||
abstract
|
||||
$abstractvectortype$ fromByteArray0(byte[] a, int offset);
|
||||
$abstractvectortype$ fromMemorySegment0(MemorySegment bb, long offset);
|
||||
@ForceInline
|
||||
final
|
||||
$abstractvectortype$ fromByteArray0Template(byte[] a, int offset) {
|
||||
$abstractvectortype$ fromMemorySegment0Template(MemorySegment ms, long offset) {
|
||||
$Type$Species vsp = vspecies();
|
||||
return VectorSupport.load(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
a, offset, vsp,
|
||||
(arr, off, s) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
$abstractvectortype$ fromByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<$Boxtype$>>
|
||||
$abstractvectortype$ fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
$Type$Species vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return VectorSupport.loadMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset), m,
|
||||
a, offset, vsp,
|
||||
(arr, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
$abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
$abstractvectortype$ fromByteBuffer0Template(ByteBuffer bb, int offset) {
|
||||
$Type$Species vsp = vspecies();
|
||||
return ScopedMemoryAccess.loadFromByteBuffer(
|
||||
return ScopedMemoryAccess.loadFromMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, vsp,
|
||||
(buf, off, s) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off,
|
||||
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
|
||||
(AbstractMemorySegmentImpl) ms, offset, vsp,
|
||||
(msp, off, s) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, $abstractvectortype$::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
$abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m);
|
||||
$abstractvectortype$ fromMemorySegment0(MemorySegment ms, long offset, VectorMask<$Boxtype$> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<$Boxtype$>>
|
||||
$abstractvectortype$ fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
$abstractvectortype$ fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
$Type$Species vsp = vspecies();
|
||||
m.check(vsp);
|
||||
return ScopedMemoryAccess.loadFromByteBufferMasked(
|
||||
return ScopedMemoryAccess.loadFromMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
bb, offset, m, vsp,
|
||||
(buf, off, s, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
return s.ldOp(wb, off, vm,
|
||||
(wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
|
||||
(AbstractMemorySegmentImpl) ms, offset, m, vsp,
|
||||
(msp, off, s, vm) -> {
|
||||
return s.ldLongOp((MemorySegment) msp, off, vm, $abstractvectortype$::memorySegmentGet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -4892,7 +4974,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v)
|
||||
-> v.stOp(arr, off,
|
||||
-> v.stOp(arr, (int) off,
|
||||
(arr_, off_, i, e) -> arr_[off_+i] = e));
|
||||
}
|
||||
|
||||
@ -4909,7 +4991,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
a, arrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = e));
|
||||
}
|
||||
|
||||
@ -4990,76 +5072,38 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
a, booleanArrayAddress(a, offset),
|
||||
normalized, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = (e & 1) != 0));
|
||||
}
|
||||
#end[byte]
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset);
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteArray0Template(byte[] a, int offset) {
|
||||
void intoMemorySegment0(MemorySegment ms, long offset) {
|
||||
$Type$Species vsp = vspecies();
|
||||
VectorSupport.store(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, a, offset,
|
||||
(arr, off, v) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(tb_, o, i, e) -> tb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<$Boxtype$>>
|
||||
void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
|
||||
$Type$Species vsp = vspecies();
|
||||
m.check(vsp);
|
||||
VectorSupport.storeMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
a, byteArrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(tb_, o, i, e) -> tb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset) {
|
||||
$Type$Species vsp = vspecies();
|
||||
ScopedMemoryAccess.storeIntoByteBuffer(
|
||||
ScopedMemoryAccess.storeIntoMemorySegment(
|
||||
vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
|
||||
this, bb, offset,
|
||||
(buf, off, v) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off,
|
||||
(wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
|
||||
this,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, $abstractvectortype$::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
abstract
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m);
|
||||
void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<$Boxtype$> m);
|
||||
@ForceInline
|
||||
final
|
||||
<M extends VectorMask<$Boxtype$>>
|
||||
void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
|
||||
void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
|
||||
$Type$Species vsp = vspecies();
|
||||
m.check(vsp);
|
||||
ScopedMemoryAccess.storeIntoByteBufferMasked(
|
||||
ScopedMemoryAccess.storeIntoMemorySegmentMasked(
|
||||
vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
|
||||
this, m, bb, offset,
|
||||
(buf, off, v, vm) -> {
|
||||
ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
|
||||
v.stOp(wb, off, vm,
|
||||
(wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
|
||||
this, m,
|
||||
(AbstractMemorySegmentImpl) ms, offset,
|
||||
(msp, off, v, vm) -> {
|
||||
v.stLongOp((MemorySegment) msp, off, vm, $abstractvectortype$::memorySegmentSet);
|
||||
});
|
||||
}
|
||||
|
||||
@ -5078,7 +5122,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
a, charArrayAddress(a, offset),
|
||||
this, m, a, offset,
|
||||
(arr, off, v, vm)
|
||||
-> v.stOp(arr, off, vm,
|
||||
-> v.stOp(arr, (int) off, vm,
|
||||
(arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
|
||||
}
|
||||
#end[short]
|
||||
@ -5095,6 +5139,16 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
private static
|
||||
void checkMaskFromIndexSize(long offset,
|
||||
$Type$Species vsp,
|
||||
VectorMask<$Boxtype$> m,
|
||||
int scale,
|
||||
long limit) {
|
||||
((AbstractMask<$Boxtype$>)m)
|
||||
.checkIndexByLane(offset, limit, vsp.iota(), scale);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private void conditionalStoreNYI(int offset,
|
||||
$Type$Species vsp,
|
||||
@ -5463,6 +5517,21 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
return dummyVector().ldOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
$abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
$abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
|
||||
VectorMask<$Boxtype$> m,
|
||||
FLdLongOp f) {
|
||||
return dummyVector().ldLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
<M> void stOp(M memory, int offset, FStOp<M> f) {
|
||||
@ -5477,6 +5546,20 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
dummyVector().stOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, f);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void stLongOp(MemorySegment memory, long offset,
|
||||
AbstractMask<$Boxtype$> m,
|
||||
FStLongOp f) {
|
||||
dummyVector().stLongOp(memory, offset, m, f);
|
||||
}
|
||||
|
||||
// N.B. Make sure these constant vectors and
|
||||
// masks load up correctly into registers.
|
||||
//
|
||||
@ -5590,3 +5673,4 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
|
||||
public static final VectorSpecies<$Boxtype$> SPECIES_PREFERRED
|
||||
= ($Type$Species) VectorSpecies.ofPreferred($type$.class);
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
@ -480,6 +480,22 @@ final class $vectortype$ extends $abstractvectortype$ {
|
||||
($vectortype$) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public $vectortype$ compress(VectorMask<$Boxtype$> m) {
|
||||
return ($vectortype$)
|
||||
super.compressTemplate($masktype$.class,
|
||||
($masktype$) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public $vectortype$ expand(VectorMask<$Boxtype$> m) {
|
||||
return ($vectortype$)
|
||||
super.expandTemplate($masktype$.class,
|
||||
($masktype$) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public $vectortype$ selectFrom(Vector<$Boxtype$> v) {
|
||||
@ -920,6 +936,15 @@ final class $vectortype$ extends $abstractvectortype$ {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public $masktype$ compress() {
|
||||
return ($masktype$)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
|
||||
$vectortype$.class, $masktype$.class, ETYPE, VLENGTH, null, this,
|
||||
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
|
||||
}
|
||||
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ -1159,29 +1184,15 @@ final class $vectortype$ extends $abstractvectortype$ {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
$abstractvectortype$ fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
$abstractvectortype$ fromMemorySegment0(MemorySegment ms, long offset) {
|
||||
return super.fromMemorySegment0Template(ms, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
$abstractvectortype$ fromByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m) {
|
||||
return super.fromByteArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
$abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
$abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m) {
|
||||
return super.fromByteBuffer0Template($masktype$.class, bb, offset, ($masktype$) m); // specialize
|
||||
$abstractvectortype$ fromMemorySegment0(MemorySegment ms, long offset, VectorMask<$Boxtype$> m) {
|
||||
return super.fromMemorySegment0Template($masktype$.class, ms, offset, ($masktype$) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@ -1219,22 +1230,8 @@ final class $vectortype$ extends $abstractvectortype$ {
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset, VectorMask<$Boxtype$> m) {
|
||||
super.intoByteArray0Template($masktype$.class, a, offset, ($masktype$) m); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<$Boxtype$> m) {
|
||||
super.intoByteBuffer0Template($masktype$.class, bb, offset, ($masktype$) m);
|
||||
void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<$Boxtype$> m) {
|
||||
super.intoMemorySegment0Template($masktype$.class, ms, offset, ($masktype$) m);
|
||||
}
|
||||
|
||||
#if[short]
|
||||
@ -1251,3 +1248,4 @@ final class $vectortype$ extends $abstractvectortype$ {
|
||||
// ================================================
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -224,6 +224,8 @@ public class AMD64 extends Architecture {
|
||||
RDTSCP,
|
||||
RDPID,
|
||||
FSRM,
|
||||
GFNI,
|
||||
AVX512_BITALG,
|
||||
}
|
||||
|
||||
private final EnumSet<CPUFeature> features;
|
||||
|
||||
@ -1769,6 +1769,10 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
["lasta", "__ sve_lasta(v0, __ B, p0, z15);", "lasta\tb0, p0, z15.b"],
|
||||
["lastb", "__ sve_lastb(v1, __ B, p1, z16);", "lastb\tb1, p1, z16.b"],
|
||||
["index", "__ sve_index(z6, __ S, 1, 1);", "index\tz6.s, #1, #1"],
|
||||
["index", "__ sve_index(z6, __ B, r5, 2);", "index\tz6.b, w5, #2"],
|
||||
["index", "__ sve_index(z6, __ H, r5, 3);", "index\tz6.h, w5, #3"],
|
||||
["index", "__ sve_index(z6, __ S, r5, 4);", "index\tz6.s, w5, #4"],
|
||||
["index", "__ sve_index(z7, __ D, r5, 5);", "index\tz7.d, x5, #5"],
|
||||
["cpy", "__ sve_cpy(z7, __ H, p3, r5);", "cpy\tz7.h, p3/m, w5"],
|
||||
["tbl", "__ sve_tbl(z16, __ S, z17, z18);", "tbl\tz16.s, {z17.s}, z18.s"],
|
||||
["ld1w", "__ sve_ld1w_gather(z15, p0, r5, z16);", "ld1w\t{z15.s}, p0/z, [x5, z16.s, uxtw #2]"],
|
||||
@ -1811,7 +1815,12 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
["uzp2", "__ sve_uzp2(p0, __ D, p0, p1);", "uzp2\tp0.d, p0.d, p1.d"],
|
||||
["punpklo", "__ sve_punpklo(p1, p0);", "punpklo\tp1.h, p0.b"],
|
||||
["punpkhi", "__ sve_punpkhi(p1, p0);", "punpkhi\tp1.h, p0.b"],
|
||||
["compact", "__ sve_compact(z16, __ S, z16, p1);", "compact\tz16.s, p1, z16.s"],
|
||||
["compact", "__ sve_compact(z16, __ D, z16, p1);", "compact\tz16.d, p1, z16.d"],
|
||||
["ext", "__ sve_ext(z17, z16, 63);", "ext\tz17.b, z17.b, z16.b, #63"],
|
||||
# SVE2 instructions
|
||||
["histcnt", "__ sve_histcnt(z16, __ S, p0, z16, z16);", "histcnt\tz16.s, p0/z, z16.s, z16.s"],
|
||||
["histcnt", "__ sve_histcnt(z17, __ D, p0, z17, z17);", "histcnt\tz17.d, p0/z, z17.d, z17.d"],
|
||||
])
|
||||
|
||||
print "\n// FloatImmediateOp"
|
||||
@ -1855,6 +1864,7 @@ generate(SVEVectorOp, [["add", "ZZZ"],
|
||||
["and", "ZPZ", "m", "dn"],
|
||||
["asr", "ZPZ", "m", "dn"],
|
||||
["bic", "ZPZ", "m", "dn"],
|
||||
["clz", "ZPZ", "m"],
|
||||
["cnt", "ZPZ", "m"],
|
||||
["eor", "ZPZ", "m", "dn"],
|
||||
["lsl", "ZPZ", "m", "dn"],
|
||||
@ -1863,6 +1873,8 @@ generate(SVEVectorOp, [["add", "ZZZ"],
|
||||
["neg", "ZPZ", "m"],
|
||||
["not", "ZPZ", "m"],
|
||||
["orr", "ZPZ", "m", "dn"],
|
||||
["rbit", "ZPZ", "m"],
|
||||
["revb", "ZPZ", "m"],
|
||||
["smax", "ZPZ", "m", "dn"],
|
||||
["smin", "ZPZ", "m", "dn"],
|
||||
["sub", "ZPZ", "m", "dn"],
|
||||
|
||||
@ -913,6 +913,10 @@
|
||||
__ sve_lasta(v0, __ B, p0, z15); // lasta b0, p0, z15.b
|
||||
__ sve_lastb(v1, __ B, p1, z16); // lastb b1, p1, z16.b
|
||||
__ sve_index(z6, __ S, 1, 1); // index z6.s, #1, #1
|
||||
__ sve_index(z6, __ B, r5, 2); // index z6.b, w5, #2
|
||||
__ sve_index(z6, __ H, r5, 3); // index z6.h, w5, #3
|
||||
__ sve_index(z6, __ S, r5, 4); // index z6.s, w5, #4
|
||||
__ sve_index(z7, __ D, r5, 5); // index z7.d, x5, #5
|
||||
__ sve_cpy(z7, __ H, p3, r5); // cpy z7.h, p3/m, w5
|
||||
__ sve_tbl(z16, __ S, z17, z18); // tbl z16.s, {z17.s}, z18.s
|
||||
__ sve_ld1w_gather(z15, p0, r5, z16); // ld1w {z15.s}, p0/z, [x5, z16.s, uxtw #2]
|
||||
@ -955,7 +959,11 @@
|
||||
__ sve_uzp2(p0, __ D, p0, p1); // uzp2 p0.d, p0.d, p1.d
|
||||
__ sve_punpklo(p1, p0); // punpklo p1.h, p0.b
|
||||
__ sve_punpkhi(p1, p0); // punpkhi p1.h, p0.b
|
||||
__ sve_compact(z16, __ S, z16, p1); // compact z16.s, p1, z16.s
|
||||
__ sve_compact(z16, __ D, z16, p1); // compact z16.d, p1, z16.d
|
||||
__ sve_ext(z17, z16, 63); // ext z17.b, z17.b, z16.b, #63
|
||||
__ sve_histcnt(z16, __ S, p0, z16, z16); // histcnt z16.s, p0/z, z16.s, z16.s
|
||||
__ sve_histcnt(z17, __ D, p0, z17, z17); // histcnt z17.d, p0/z, z17.d, z17.d
|
||||
|
||||
// FloatImmediateOp
|
||||
__ fmovd(v0, 2.0); // fmov d0, #2.0
|
||||
@ -1144,57 +1152,60 @@
|
||||
__ sve_and(z22, __ D, p5, z20); // and z22.d, p5/m, z22.d, z20.d
|
||||
__ sve_asr(z28, __ S, p2, z13); // asr z28.s, p2/m, z28.s, z13.s
|
||||
__ sve_bic(z7, __ H, p5, z28); // bic z7.h, p5/m, z7.h, z28.h
|
||||
__ sve_cnt(z11, __ S, p3, z11); // cnt z11.s, p3/m, z11.s
|
||||
__ sve_eor(z1, __ S, p6, z8); // eor z1.s, p6/m, z1.s, z8.s
|
||||
__ sve_lsl(z13, __ S, p4, z17); // lsl z13.s, p4/m, z13.s, z17.s
|
||||
__ sve_lsr(z4, __ H, p0, z3); // lsr z4.h, p0/m, z4.h, z3.h
|
||||
__ sve_mul(z7, __ S, p3, z14); // mul z7.s, p3/m, z7.s, z14.s
|
||||
__ sve_neg(z4, __ B, p3, z29); // neg z4.b, p3/m, z29.b
|
||||
__ sve_not(z0, __ D, p2, z21); // not z0.d, p2/m, z21.d
|
||||
__ sve_orr(z3, __ S, p0, z9); // orr z3.s, p0/m, z3.s, z9.s
|
||||
__ sve_smax(z28, __ B, p2, z24); // smax z28.b, p2/m, z28.b, z24.b
|
||||
__ sve_smin(z19, __ D, p1, z23); // smin z19.d, p1/m, z19.d, z23.d
|
||||
__ sve_sub(z13, __ D, p5, z10); // sub z13.d, p5/m, z13.d, z10.d
|
||||
__ sve_fabs(z12, __ D, p4, z30); // fabs z12.d, p4/m, z30.d
|
||||
__ sve_fadd(z14, __ D, p0, z29); // fadd z14.d, p0/m, z14.d, z29.d
|
||||
__ sve_fdiv(z21, __ D, p5, z7); // fdiv z21.d, p5/m, z21.d, z7.d
|
||||
__ sve_fmax(z2, __ D, p0, z26); // fmax z2.d, p0/m, z2.d, z26.d
|
||||
__ sve_fmin(z9, __ D, p4, z17); // fmin z9.d, p4/m, z9.d, z17.d
|
||||
__ sve_fmul(z0, __ D, p1, z2); // fmul z0.d, p1/m, z0.d, z2.d
|
||||
__ sve_fneg(z14, __ D, p1, z11); // fneg z14.d, p1/m, z11.d
|
||||
__ sve_frintm(z14, __ S, p4, z29); // frintm z14.s, p4/m, z29.s
|
||||
__ sve_frintn(z3, __ S, p0, z22); // frintn z3.s, p0/m, z22.s
|
||||
__ sve_frintp(z3, __ S, p6, z27); // frintp z3.s, p6/m, z27.s
|
||||
__ sve_fsqrt(z19, __ D, p5, z7); // fsqrt z19.d, p5/m, z7.d
|
||||
__ sve_fsub(z21, __ S, p3, z5); // fsub z21.s, p3/m, z21.s, z5.s
|
||||
__ sve_fmad(z25, __ D, p1, z21, z17); // fmad z25.d, p1/m, z21.d, z17.d
|
||||
__ sve_fmla(z0, __ S, p0, z9, z19); // fmla z0.s, p0/m, z9.s, z19.s
|
||||
__ sve_fmls(z7, __ D, p3, z14, z17); // fmls z7.d, p3/m, z14.d, z17.d
|
||||
__ sve_fmsb(z11, __ D, p3, z24, z17); // fmsb z11.d, p3/m, z24.d, z17.d
|
||||
__ sve_fnmad(z17, __ D, p2, z15, z14); // fnmad z17.d, p2/m, z15.d, z14.d
|
||||
__ sve_fnmsb(z22, __ S, p7, z22, z7); // fnmsb z22.s, p7/m, z22.s, z7.s
|
||||
__ sve_fnmla(z5, __ S, p7, z27, z10); // fnmla z5.s, p7/m, z27.s, z10.s
|
||||
__ sve_fnmls(z14, __ S, p6, z21, z20); // fnmls z14.s, p6/m, z21.s, z20.s
|
||||
__ sve_mla(z3, __ D, p5, z25, z5); // mla z3.d, p5/m, z25.d, z5.d
|
||||
__ sve_mls(z29, __ H, p4, z17, z1); // mls z29.h, p4/m, z17.h, z1.h
|
||||
__ sve_and(z14, z29, z13); // and z14.d, z29.d, z13.d
|
||||
__ sve_eor(z17, z2, z30); // eor z17.d, z2.d, z30.d
|
||||
__ sve_orr(z22, z21, z29); // orr z22.d, z21.d, z29.d
|
||||
__ sve_bic(z8, z2, z0); // bic z8.d, z2.d, z0.d
|
||||
__ sve_uzp1(z23, __ S, z22, z0); // uzp1 z23.s, z22.s, z0.s
|
||||
__ sve_uzp2(z25, __ H, z26, z23); // uzp2 z25.h, z26.h, z23.h
|
||||
__ sve_bext(z21, __ B, z21, z1); // bext z21.b, z21.b, z1.b
|
||||
__ sve_clz(z11, __ S, p3, z11); // clz z11.s, p3/m, z11.s
|
||||
__ sve_cnt(z1, __ S, p6, z8); // cnt z1.s, p6/m, z8.s
|
||||
__ sve_eor(z13, __ S, p4, z17); // eor z13.s, p4/m, z13.s, z17.s
|
||||
__ sve_lsl(z4, __ H, p0, z3); // lsl z4.h, p0/m, z4.h, z3.h
|
||||
__ sve_lsr(z7, __ S, p3, z14); // lsr z7.s, p3/m, z7.s, z14.s
|
||||
__ sve_mul(z4, __ B, p3, z29); // mul z4.b, p3/m, z4.b, z29.b
|
||||
__ sve_neg(z0, __ D, p2, z21); // neg z0.d, p2/m, z21.d
|
||||
__ sve_not(z3, __ S, p0, z9); // not z3.s, p0/m, z9.s
|
||||
__ sve_orr(z28, __ B, p2, z24); // orr z28.b, p2/m, z28.b, z24.b
|
||||
__ sve_rbit(z19, __ D, p1, z23); // rbit z19.d, p1/m, z23.d
|
||||
__ sve_revb(z13, __ D, p5, z10); // revb z13.d, p5/m, z10.d
|
||||
__ sve_smax(z12, __ S, p4, z30); // smax z12.s, p4/m, z12.s, z30.s
|
||||
__ sve_smin(z14, __ S, p0, z29); // smin z14.s, p0/m, z14.s, z29.s
|
||||
__ sve_sub(z21, __ S, p5, z7); // sub z21.s, p5/m, z21.s, z7.s
|
||||
__ sve_fabs(z2, __ D, p0, z26); // fabs z2.d, p0/m, z26.d
|
||||
__ sve_fadd(z9, __ D, p4, z17); // fadd z9.d, p4/m, z9.d, z17.d
|
||||
__ sve_fdiv(z0, __ D, p1, z2); // fdiv z0.d, p1/m, z0.d, z2.d
|
||||
__ sve_fmax(z14, __ D, p1, z11); // fmax z14.d, p1/m, z14.d, z11.d
|
||||
__ sve_fmin(z14, __ S, p4, z29); // fmin z14.s, p4/m, z14.s, z29.s
|
||||
__ sve_fmul(z3, __ S, p0, z22); // fmul z3.s, p0/m, z3.s, z22.s
|
||||
__ sve_fneg(z3, __ S, p6, z27); // fneg z3.s, p6/m, z27.s
|
||||
__ sve_frintm(z19, __ D, p5, z7); // frintm z19.d, p5/m, z7.d
|
||||
__ sve_frintn(z21, __ S, p3, z5); // frintn z21.s, p3/m, z5.s
|
||||
__ sve_frintp(z25, __ D, p1, z21); // frintp z25.d, p1/m, z21.d
|
||||
__ sve_fsqrt(z17, __ S, p0, z3); // fsqrt z17.s, p0/m, z3.s
|
||||
__ sve_fsub(z19, __ S, p3, z7); // fsub z19.s, p3/m, z19.s, z7.s
|
||||
__ sve_fmad(z14, __ S, p4, z17, z11); // fmad z14.s, p4/m, z17.s, z11.s
|
||||
__ sve_fmla(z24, __ S, p4, z30, z17); // fmla z24.s, p4/m, z30.s, z17.s
|
||||
__ sve_fmls(z15, __ D, p3, z26, z22); // fmls z15.d, p3/m, z26.d, z22.d
|
||||
__ sve_fmsb(z22, __ D, p2, z8, z5); // fmsb z22.d, p2/m, z8.d, z5.d
|
||||
__ sve_fnmad(z27, __ D, p2, z0, z14); // fnmad z27.d, p2/m, z0.d, z14.d
|
||||
__ sve_fnmsb(z21, __ D, p5, z0, z3); // fnmsb z21.d, p5/m, z0.d, z3.d
|
||||
__ sve_fnmla(z25, __ D, p1, z25, z29); // fnmla z25.d, p1/m, z25.d, z29.d
|
||||
__ sve_fnmls(z17, __ D, p0, z12, z14); // fnmls z17.d, p0/m, z12.d, z14.d
|
||||
__ sve_mla(z13, __ D, p0, z17, z2); // mla z13.d, p0/m, z17.d, z2.d
|
||||
__ sve_mls(z20, __ H, p5, z21, z29); // mls z20.h, p5/m, z21.h, z29.h
|
||||
__ sve_and(z8, z2, z0); // and z8.d, z2.d, z0.d
|
||||
__ sve_eor(z23, z22, z0); // eor z23.d, z22.d, z0.d
|
||||
__ sve_orr(z25, z26, z23); // orr z25.d, z26.d, z23.d
|
||||
__ sve_bic(z21, z21, z1); // bic z21.d, z21.d, z1.d
|
||||
__ sve_uzp1(z10, __ S, z19, z11); // uzp1 z10.s, z19.s, z11.s
|
||||
__ sve_uzp2(z23, __ D, z23, z8); // uzp2 z23.d, z23.d, z8.d
|
||||
__ sve_bext(z17, __ S, z19, z19); // bext z17.s, z19.s, z19.s
|
||||
|
||||
// SVEReductionOp
|
||||
__ sve_andv(v10, __ S, p5, z11); // andv s10, p5, z11.s
|
||||
__ sve_orv(v23, __ D, p6, z8); // orv d23, p6, z8.d
|
||||
__ sve_eorv(v17, __ S, p5, z19); // eorv s17, p5, z19.s
|
||||
__ sve_smaxv(v4, __ D, p5, z13); // smaxv d4, p5, z13.d
|
||||
__ sve_sminv(v22, __ D, p7, z30); // sminv d22, p7, z30.d
|
||||
__ sve_fminv(v17, __ S, p4, z14); // fminv s17, p4, z14.s
|
||||
__ sve_fmaxv(v12, __ S, p7, z20); // fmaxv s12, p7, z20.s
|
||||
__ sve_fadda(v1, __ S, p3, z13); // fadda s1, p3, s1, z13.s
|
||||
__ sve_uaddv(v7, __ S, p2, z11); // uaddv d7, p2, z11.s
|
||||
__ sve_andv(v4, __ D, p5, z13); // andv d4, p5, z13.d
|
||||
__ sve_orv(v22, __ D, p7, z30); // orv d22, p7, z30.d
|
||||
__ sve_eorv(v17, __ H, p4, z14); // eorv h17, p4, z14.h
|
||||
__ sve_smaxv(v12, __ B, p7, z20); // smaxv b12, p7, z20.b
|
||||
__ sve_sminv(v1, __ B, p3, z13); // sminv b1, p3, z13.b
|
||||
__ sve_fminv(v7, __ D, p2, z11); // fminv d7, p2, z11.d
|
||||
__ sve_fmaxv(v4, __ S, p6, z15); // fmaxv s4, p6, z15.s
|
||||
__ sve_fadda(v3, __ D, p7, z0); // fadda d3, p7, d3, z0.d
|
||||
__ sve_uaddv(v5, __ D, p5, z30); // uaddv d5, p5, z30.d
|
||||
|
||||
__ bind(forth);
|
||||
|
||||
@ -1213,30 +1224,30 @@
|
||||
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
||||
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
|
||||
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
||||
0x14000000, 0x17ffffd7, 0x140003e5, 0x94000000,
|
||||
0x97ffffd4, 0x940003e2, 0x3400000a, 0x34fffa2a,
|
||||
0x34007bea, 0x35000008, 0x35fff9c8, 0x35007b88,
|
||||
0xb400000b, 0xb4fff96b, 0xb4007b2b, 0xb500001d,
|
||||
0xb5fff91d, 0xb5007add, 0x10000013, 0x10fff8b3,
|
||||
0x10007a73, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x363079f6, 0x3758000c, 0x375ff7cc, 0x3758798c,
|
||||
0x14000000, 0x17ffffd7, 0x140003f0, 0x94000000,
|
||||
0x97ffffd4, 0x940003ed, 0x3400000a, 0x34fffa2a,
|
||||
0x34007d4a, 0x35000008, 0x35fff9c8, 0x35007ce8,
|
||||
0xb400000b, 0xb4fff96b, 0xb4007c8b, 0xb500001d,
|
||||
0xb5fff91d, 0xb5007c3d, 0x10000013, 0x10fff8b3,
|
||||
0x10007bd3, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36307b56, 0x3758000c, 0x375ff7cc, 0x37587aec,
|
||||
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
||||
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
||||
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
||||
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
||||
0x54007760, 0x54000001, 0x54fff541, 0x54007701,
|
||||
0x54000002, 0x54fff4e2, 0x540076a2, 0x54000002,
|
||||
0x54fff482, 0x54007642, 0x54000003, 0x54fff423,
|
||||
0x540075e3, 0x54000003, 0x54fff3c3, 0x54007583,
|
||||
0x54000004, 0x54fff364, 0x54007524, 0x54000005,
|
||||
0x54fff305, 0x540074c5, 0x54000006, 0x54fff2a6,
|
||||
0x54007466, 0x54000007, 0x54fff247, 0x54007407,
|
||||
0x54000008, 0x54fff1e8, 0x540073a8, 0x54000009,
|
||||
0x54fff189, 0x54007349, 0x5400000a, 0x54fff12a,
|
||||
0x540072ea, 0x5400000b, 0x54fff0cb, 0x5400728b,
|
||||
0x5400000c, 0x54fff06c, 0x5400722c, 0x5400000d,
|
||||
0x54fff00d, 0x540071cd, 0x5400000e, 0x54ffefae,
|
||||
0x5400716e, 0x5400000f, 0x54ffef4f, 0x5400710f,
|
||||
0x540078c0, 0x54000001, 0x54fff541, 0x54007861,
|
||||
0x54000002, 0x54fff4e2, 0x54007802, 0x54000002,
|
||||
0x54fff482, 0x540077a2, 0x54000003, 0x54fff423,
|
||||
0x54007743, 0x54000003, 0x54fff3c3, 0x540076e3,
|
||||
0x54000004, 0x54fff364, 0x54007684, 0x54000005,
|
||||
0x54fff305, 0x54007625, 0x54000006, 0x54fff2a6,
|
||||
0x540075c6, 0x54000007, 0x54fff247, 0x54007567,
|
||||
0x54000008, 0x54fff1e8, 0x54007508, 0x54000009,
|
||||
0x54fff189, 0x540074a9, 0x5400000a, 0x54fff12a,
|
||||
0x5400744a, 0x5400000b, 0x54fff0cb, 0x540073eb,
|
||||
0x5400000c, 0x54fff06c, 0x5400738c, 0x5400000d,
|
||||
0x54fff00d, 0x5400732d, 0x5400000e, 0x54ffefae,
|
||||
0x540072ce, 0x5400000f, 0x54ffef4f, 0x5400726f,
|
||||
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
||||
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
|
||||
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
|
||||
@ -1401,7 +1412,8 @@
|
||||
0x6554ac26, 0x6556ac26, 0x6552ac26, 0x65cbac85,
|
||||
0x65caac01, 0x65dea833, 0x659ca509, 0x65d8a801,
|
||||
0x65dcac01, 0x655cb241, 0x0520a1e0, 0x0521a601,
|
||||
0x052281e0, 0x05238601, 0x04a14026, 0x0568aca7,
|
||||
0x052281e0, 0x05238601, 0x04a14026, 0x042244a6,
|
||||
0x046344a6, 0x04a444a6, 0x04e544a7, 0x0568aca7,
|
||||
0x05b23230, 0x853040af, 0xc5b040af, 0xe57080af,
|
||||
0xe5b080af, 0x25034440, 0x254054c4, 0x25034640,
|
||||
0x25415a05, 0x25834440, 0x25c54489, 0x250b5d3a,
|
||||
@ -1412,7 +1424,8 @@
|
||||
0x25d8e104, 0x25d8e184, 0x2518e407, 0x05214800,
|
||||
0x05614800, 0x05a14800, 0x05e14800, 0x05214c00,
|
||||
0x05614c00, 0x05a14c00, 0x05e14c00, 0x05304001,
|
||||
0x05314001, 0x05271e11, 0x1e601000, 0x1e603000,
|
||||
0x05314001, 0x05a18610, 0x05e18610, 0x05271e11,
|
||||
0x45b0c210, 0x45f1c231, 0x1e601000, 0x1e603000,
|
||||
0x1e621000, 0x1e623000, 0x1e641000, 0x1e643000,
|
||||
0x1e661000, 0x1e663000, 0x1e681000, 0x1e683000,
|
||||
0x1e6a1000, 0x1e6a3000, 0x1e6c1000, 0x1e6c3000,
|
||||
@ -1450,18 +1463,19 @@
|
||||
0x25a1de96, 0x05808874, 0x05423bb1, 0x050030e4,
|
||||
0x04680102, 0x04be0638, 0x658103c4, 0x65800993,
|
||||
0x65910707, 0x04d6a53b, 0x04c00e17, 0x04da1696,
|
||||
0x049089bc, 0x045b1787, 0x049aad6b, 0x04991901,
|
||||
0x0493922d, 0x04518064, 0x04900dc7, 0x0417afa4,
|
||||
0x04deaaa0, 0x04980123, 0x04080b1c, 0x04ca06f3,
|
||||
0x04c1154d, 0x04dcb3cc, 0x65c083ae, 0x65cd94f5,
|
||||
0x65c68342, 0x65c79229, 0x65c28440, 0x04dda56e,
|
||||
0x6582b3ae, 0x6580a2c3, 0x6581bb63, 0x65cdb4f3,
|
||||
0x65818cb5, 0x65f186b9, 0x65b30120, 0x65f12dc7,
|
||||
0x65f1af0b, 0x65eec9f1, 0x65a7fed6, 0x65aa5f65,
|
||||
0x65b47aae, 0x04c55723, 0x0441723d, 0x042d33ae,
|
||||
0x04be3051, 0x047d32b6, 0x04e03048, 0x05a06ad7,
|
||||
0x05776f59, 0x4501b2b5, 0x049a356a, 0x04d83917,
|
||||
0x04993671, 0x04c835a4, 0x04ca3fd6, 0x658731d1,
|
||||
0x65863e8c, 0x65982da1, 0x04812967,
|
||||
0x049089bc, 0x045b1787, 0x0499ad6b, 0x049ab901,
|
||||
0x0499122d, 0x04538064, 0x04918dc7, 0x04100fa4,
|
||||
0x04d7aaa0, 0x049ea123, 0x04180b1c, 0x05e786f3,
|
||||
0x05e4954d, 0x048813cc, 0x048a03ae, 0x048114f5,
|
||||
0x04dca342, 0x65c09229, 0x65cd8440, 0x65c6856e,
|
||||
0x658793ae, 0x658282c3, 0x049dbb63, 0x65c2b4f3,
|
||||
0x6580acb5, 0x65c1a6b9, 0x658da071, 0x65818cf3,
|
||||
0x65ab922e, 0x65b113d8, 0x65f62f4f, 0x65e5a916,
|
||||
0x65eec81b, 0x65e3f415, 0x65fd4739, 0x65ee6191,
|
||||
0x04c2422d, 0x045d76b4, 0x04203048, 0x04a032d7,
|
||||
0x04773359, 0x04e132b5, 0x05ab6a6a, 0x05e86ef7,
|
||||
0x4593b271, 0x04da35a4, 0x04d83fd6, 0x045931d1,
|
||||
0x04083e8c, 0x040a2da1, 0x65c72967, 0x658639e4,
|
||||
0x65d83c03, 0x04c137c5,
|
||||
};
|
||||
// END Generated code -- do not edit
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
* Copyright (C) 2021, 2022, THL A29 Limited, a Tencent company. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -23,11 +23,13 @@
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import jdk.incubator.vector.*;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @enablePreview
|
||||
* @bug 8262998
|
||||
* @summary Vector API intrinsincs should not modify IR when bailing out
|
||||
* @modules jdk.incubator.vector
|
||||
@ -40,13 +42,15 @@ public class TestIntrinsicBailOut {
|
||||
static final VectorSpecies<Double> SPECIES256 = DoubleVector.SPECIES_256;
|
||||
static byte[] a = new byte[512];
|
||||
static byte[] r = new byte[512];
|
||||
static MemorySegment msa = MemorySegment.ofArray(a);
|
||||
static MemorySegment msr = MemorySegment.ofArray(r);
|
||||
|
||||
static void test() {
|
||||
DoubleVector av = DoubleVector.fromByteArray(SPECIES256, a, 0, ByteOrder.BIG_ENDIAN);
|
||||
av.intoByteArray(r, 0, ByteOrder.BIG_ENDIAN);
|
||||
DoubleVector av = DoubleVector.fromMemorySegment(SPECIES256, msa, 0, ByteOrder.BIG_ENDIAN);
|
||||
av.intoMemorySegment(msr, 0, ByteOrder.BIG_ENDIAN);
|
||||
|
||||
DoubleVector bv = DoubleVector.fromByteArray(SPECIES256, a, 32, ByteOrder.LITTLE_ENDIAN);
|
||||
bv.intoByteArray(r, 32, ByteOrder.LITTLE_ENDIAN);
|
||||
DoubleVector bv = DoubleVector.fromMemorySegment(SPECIES256, msa, 32, ByteOrder.LITTLE_ENDIAN);
|
||||
bv.intoMemorySegment(msr, 32, ByteOrder.LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
@ -38,42 +38,42 @@ public class TestVectorErgonomics {
|
||||
|
||||
public static void main(String[] args) throws Throwable {
|
||||
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
|
||||
"-XX:+EnableVectorReboxing", "-Xlog:compilation", "-version")
|
||||
"-XX:+EnableVectorReboxing", "-Xlog:compilation", "-version", "--enable-preview")
|
||||
.shouldHaveExitValue(0)
|
||||
.shouldContain("EnableVectorReboxing=true");
|
||||
|
||||
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
|
||||
"-XX:+EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version")
|
||||
"-XX:+EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version", "--enable-preview")
|
||||
.shouldHaveExitValue(0)
|
||||
.shouldContain("EnableVectorAggressiveReboxing=true");
|
||||
|
||||
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
|
||||
"-XX:-EnableVectorReboxing", "-Xlog:compilation", "-version")
|
||||
"-XX:-EnableVectorReboxing", "-Xlog:compilation", "-version", "--enable-preview")
|
||||
.shouldHaveExitValue(0)
|
||||
.shouldContain("EnableVectorReboxing=false")
|
||||
.shouldContain("EnableVectorAggressiveReboxing=false");
|
||||
|
||||
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
|
||||
"-XX:-EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version")
|
||||
"-XX:-EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version", "--enable-preview")
|
||||
.shouldHaveExitValue(0)
|
||||
.shouldContain("EnableVectorAggressiveReboxing=false");
|
||||
|
||||
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
|
||||
"-XX:-EnableVectorSupport", "-Xlog:compilation", "-version")
|
||||
"-XX:-EnableVectorSupport", "-Xlog:compilation", "-version", "--enable-preview")
|
||||
.shouldHaveExitValue(0)
|
||||
.shouldContain("EnableVectorSupport=false")
|
||||
.shouldContain("EnableVectorReboxing=false")
|
||||
.shouldContain("EnableVectorAggressiveReboxing=false");
|
||||
|
||||
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
|
||||
"-XX:-EnableVectorSupport", "-XX:+EnableVectorReboxing", "-Xlog:compilation", "-version")
|
||||
"-XX:-EnableVectorSupport", "-XX:+EnableVectorReboxing", "-Xlog:compilation", "-version", "--enable-preview")
|
||||
.shouldHaveExitValue(0)
|
||||
.shouldContain("EnableVectorSupport=false")
|
||||
.shouldContain("EnableVectorReboxing=false")
|
||||
.shouldContain("EnableVectorAggressiveReboxing=false");
|
||||
|
||||
ProcessTools.executeTestJvm("--add-modules=jdk.incubator.vector", "-XX:+UnlockExperimentalVMOptions",
|
||||
"-XX:-EnableVectorSupport", "-XX:+EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version")
|
||||
"-XX:-EnableVectorSupport", "-XX:+EnableVectorAggressiveReboxing", "-Xlog:compilation", "-version", "--enable-preview")
|
||||
.shouldHaveExitValue(0)
|
||||
.shouldContain("EnableVectorSupport=false")
|
||||
.shouldContain("EnableVectorReboxing=false")
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2021, Rado Smogura. All rights reserved.
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
@ -26,6 +26,7 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @enablePreview
|
||||
* @summary Test if memory ordering is preserved
|
||||
*
|
||||
* @run main/othervm -XX:-TieredCompilation -XX:+UnlockDiagnosticVMOptions -XX:+AbortVMOnCompilationFailure
|
||||
@ -36,8 +37,8 @@
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import jdk.incubator.vector.ByteVector;
|
||||
import jdk.incubator.vector.VectorSpecies;
|
||||
|
||||
@ -53,13 +54,13 @@ public class VectorMemoryAlias {
|
||||
|
||||
public static int test() {
|
||||
byte arr[] = new byte[256];
|
||||
final var bb = ByteBuffer.wrap(arr);
|
||||
final var ms = MemorySegment.ofArray(arr);
|
||||
final var ones = ByteVector.broadcast(SPECIES, 1);
|
||||
var res = ByteVector.zero(SPECIES);
|
||||
|
||||
int result = 0;
|
||||
result += arr[2];
|
||||
res.add(ones).intoByteBuffer(bb, 0, ByteOrder.nativeOrder());
|
||||
res.add(ones).intoMemorySegment(ms, 0L, ByteOrder.nativeOrder());
|
||||
result += arr[2];
|
||||
|
||||
return result;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -20,30 +20,28 @@
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
import jdk.incubator.vector.*;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.invoke.VarHandle;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.function.IntFunction;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
import jdk.incubator.vector.VectorShape;
|
||||
import jdk.incubator.vector.VectorSpecies;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import jdk.incubator.vector.*;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
|
||||
/*
|
||||
* @test id=Z
|
||||
* @bug 8260473
|
||||
* @enablePreview
|
||||
* @requires vm.gc.Z
|
||||
* @modules jdk.incubator.vector
|
||||
* @modules java.base/jdk.internal.vm.annotation
|
||||
* @run testng/othervm -XX:CompileCommand=compileonly,jdk/incubator/vector/ByteVector.fromByteBuffer
|
||||
* @run testng/othervm -XX:CompileCommand=compileonly,jdk/incubator/vector/ByteVector.fromMemorySegment
|
||||
* -XX:-TieredCompilation -XX:CICompilerCount=1 -XX:+UseZGC -Xbatch -Xmx256m VectorRebracket128Test
|
||||
*/
|
||||
|
||||
@ -124,8 +122,10 @@ public class VectorRebracket128Test {
|
||||
|
||||
@ForceInline
|
||||
static <E,F>
|
||||
void testVectorRebracket(VectorSpecies<E> a, VectorSpecies<F> b, byte[] input, byte[] output) {
|
||||
Vector<E> av = a.fromByteArray(input, 0, ByteOrder.nativeOrder());
|
||||
void testVectorRebracket(VectorSpecies<E> a, VectorSpecies<F> b,
|
||||
byte[] input, byte[] output,
|
||||
MemorySegment msInput, MemorySegment msOutput) {
|
||||
Vector<E> av = a.fromMemorySegment(msInput, 0, ByteOrder.nativeOrder());
|
||||
int block;
|
||||
assert(input.length == output.length);
|
||||
|
||||
@ -139,7 +139,7 @@ public class VectorRebracket128Test {
|
||||
|
||||
int part = 0;
|
||||
Vector<F> bv = av.reinterpretShape(b, part);
|
||||
bv.intoByteArray(output, 0, ByteOrder.nativeOrder());
|
||||
bv.intoMemorySegment(msOutput, 0, ByteOrder.nativeOrder());
|
||||
// in-place copy, no resize
|
||||
expected = input;
|
||||
origin = 0;
|
||||
@ -152,10 +152,12 @@ public class VectorRebracket128Test {
|
||||
static void testRebracket128(IntFunction<byte[]> fa) {
|
||||
byte[] barr = fa.apply(128/Byte.SIZE);
|
||||
byte[] bout = new byte[barr.length];
|
||||
MemorySegment msin = MemorySegment.ofArray(barr);
|
||||
MemorySegment msout = MemorySegment.ofArray(bout);
|
||||
for (int i = 0; i < NUM_ITER; i++) {
|
||||
testVectorRebracket(bspec128, bspec128, barr, bout);
|
||||
testVectorRebracket(bspec128, sspec128, barr, bout);
|
||||
testVectorRebracket(bspec128, ispec128, barr, bout);
|
||||
testVectorRebracket(bspec128, bspec128, barr, bout, msin, msout);
|
||||
testVectorRebracket(bspec128, sspec128, barr, bout, msin, msout);
|
||||
testVectorRebracket(bspec128, ispec128, barr, bout, msin, msout);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user