mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-14 12:25:21 +00:00
8223347: Integration of Vector API (Incubator)
Co-authored-by: Vivek Deshpande <vdeshpande@openjdk.org> Co-authored-by: Qi Feng <qfeng@openjdk.org> Co-authored-by: Ian Graves <igraves@openjdk.org> Co-authored-by: Jean-Philippe Halimi <jphalimi@openjdk.org> Co-authored-by: Vladimir Ivanov <vlivanov@openjdk.org> Co-authored-by: Ningsheng Jian <njian@openjdk.org> Co-authored-by: Razvan Lupusoru <rlupusoru@openjdk.org> Co-authored-by: Smita Kamath <svkamath@openjdk.org> Co-authored-by: Rahul Kandu <rkandu@openjdk.org> Co-authored-by: Kishor Kharbas <kkharbas@openjdk.org> Co-authored-by: Eric Liu <Eric.Liu2@arm.com> Co-authored-by: Aaloan Miftah <someusername3@gmail.com> Co-authored-by: John R Rose <jrose@openjdk.org> Co-authored-by: Shravya Rukmannagari <srukmannagar@openjdk.org> Co-authored-by: Paul Sandoz <psandoz@openjdk.org> Co-authored-by: Sandhya Viswanathan <sviswanathan@openjdk.org> Co-authored-by: Lauren Walkowski <lauren.walkowski@arm.com> Co-authored-by: Yang Zang <Yang.Zhang@arm.com> Co-authored-by: Joshua Zhu <jzhu@openjdk.org> Co-authored-by: Wang Zhuo <wzhuo@openjdk.org> Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org> Reviewed-by: erikj, chegar, kvn, darcy, forax, briangoetz, aph, epavlova, coleenp
This commit is contained in:
parent
386e7e8b73
commit
0c99b19258
@ -59,6 +59,7 @@ BOOT_MODULES += \
|
||||
java.security.sasl \
|
||||
java.xml \
|
||||
jdk.incubator.foreign \
|
||||
jdk.incubator.vector \
|
||||
jdk.internal.vm.ci \
|
||||
jdk.jfr \
|
||||
jdk.management \
|
||||
@ -145,6 +146,7 @@ DOCS_MODULES += \
|
||||
jdk.hotspot.agent \
|
||||
jdk.httpserver \
|
||||
jdk.incubator.jpackage \
|
||||
jdk.incubator.vector \
|
||||
jdk.jartool \
|
||||
jdk.javadoc \
|
||||
jdk.jcmd \
|
||||
|
||||
@ -138,6 +138,7 @@ ifeq ($(call check-jvm-feature, compiler2), true)
|
||||
|
||||
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
|
||||
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
|
||||
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \
|
||||
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
|
||||
)))
|
||||
endif
|
||||
|
||||
@ -106,7 +106,7 @@ public class Spp {
|
||||
static final String LNSEP = System.getProperty("line.separator");
|
||||
static final String KEY = "([a-zA-Z0-9]+)";
|
||||
static final String VAR = "([a-zA-Z0-9_\\-]+)";
|
||||
static final String TEXT = "([a-zA-Z0-9&;,.<>/#() \\?\\[\\]\\$]+)"; // $ -- hack embedded $var$
|
||||
static final String TEXT = "([\\p{Print}&&[^{#:}]]+)";
|
||||
|
||||
static final int GN_NOT = 1;
|
||||
static final int GN_KEY = 2;
|
||||
@ -140,6 +140,10 @@ public class Spp {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (repl == null) {
|
||||
System.err.println("Error: undefined variable in line " + ln);
|
||||
System.exit(-1);
|
||||
}
|
||||
vardef.appendReplacement(buf, repl);
|
||||
}
|
||||
vardef.appendTail(buf);
|
||||
|
||||
@ -1,4 +1,7 @@
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
AARCH64_AS = "as"
|
||||
AARCH64_OBJDUMP = "objdump"
|
||||
@ -129,6 +132,8 @@ class OperandFactory:
|
||||
|
||||
_modes = {'x' : GeneralRegister,
|
||||
'w' : GeneralRegister,
|
||||
'b' : FloatRegister,
|
||||
'h' : FloatRegister,
|
||||
's' : FloatRegister,
|
||||
'd' : FloatRegister,
|
||||
'z' : FloatZero,
|
||||
@ -198,16 +203,16 @@ class InstructionWithModes(Instruction):
|
||||
self.isFloat = (mode == 'd') | (mode == 's')
|
||||
if self.isFloat:
|
||||
self.isWord = mode != 'd'
|
||||
self.asmRegPrefix = ["d", "s"][self.isWord]
|
||||
self.asmRegPrefix = ["d", "s"][self.isWord]
|
||||
else:
|
||||
self.isWord = mode != 'x'
|
||||
self.asmRegPrefix = ["x", "w"][self.isWord]
|
||||
|
||||
|
||||
def name(self):
|
||||
return self._name + (self.mode if self.mode != 'x' else '')
|
||||
|
||||
|
||||
def aname(self):
|
||||
return (self._name+mode if (mode == 'b' or mode == 'h')
|
||||
return (self._name+mode if (mode == 'b' or mode == 'h')
|
||||
else self._name)
|
||||
|
||||
class ThreeRegInstruction(Instruction):
|
||||
@ -220,17 +225,17 @@ class ThreeRegInstruction(Instruction):
|
||||
|
||||
def cstr(self):
|
||||
return (super(ThreeRegInstruction, self).cstr()
|
||||
+ ('%s, %s, %s'
|
||||
+ ('%s, %s, %s'
|
||||
% (self.reg[0],
|
||||
self.reg[1], self.reg[2])))
|
||||
|
||||
|
||||
def astr(self):
|
||||
prefix = self.asmRegPrefix
|
||||
return (super(ThreeRegInstruction, self).astr()
|
||||
+ ('%s, %s, %s'
|
||||
+ ('%s, %s, %s'
|
||||
% (self.reg[0].astr(prefix),
|
||||
self.reg[1].astr(prefix), self.reg[2].astr(prefix))))
|
||||
|
||||
|
||||
class FourRegInstruction(ThreeRegInstruction):
|
||||
|
||||
def generate(self):
|
||||
@ -241,12 +246,12 @@ class FourRegInstruction(ThreeRegInstruction):
|
||||
def cstr(self):
|
||||
return (super(FourRegInstruction, self).cstr()
|
||||
+ (', %s' % self.reg[3]))
|
||||
|
||||
|
||||
def astr(self):
|
||||
prefix = self.asmRegPrefix
|
||||
return (super(FourRegInstruction, self).astr()
|
||||
+ (', %s' % self.reg[3].astr(prefix)))
|
||||
|
||||
|
||||
class TwoRegInstruction(Instruction):
|
||||
|
||||
def generate(self):
|
||||
@ -261,17 +266,17 @@ class TwoRegInstruction(Instruction):
|
||||
def astr(self):
|
||||
prefix = self.asmRegPrefix
|
||||
return (super(TwoRegInstruction, self).astr()
|
||||
+ ('%s, %s'
|
||||
+ ('%s, %s'
|
||||
% (self.reg[0].astr(prefix),
|
||||
self.reg[1].astr(prefix))))
|
||||
|
||||
|
||||
class TwoRegImmedInstruction(TwoRegInstruction):
|
||||
|
||||
def generate(self):
|
||||
super(TwoRegImmedInstruction, self).generate()
|
||||
self.immed = random.randint(0, 1<<11 -1)
|
||||
return self
|
||||
|
||||
|
||||
def cstr(self):
|
||||
return (super(TwoRegImmedInstruction, self).cstr()
|
||||
+ ', %su' % self.immed)
|
||||
@ -301,9 +306,9 @@ class ArithOp(ThreeRegInstruction):
|
||||
self.kind = ShiftKind().generate()
|
||||
self.distance = random.randint(0, (1<<5)-1 if self.isWord else (1<<6)-1)
|
||||
return self
|
||||
|
||||
|
||||
def cstr(self):
|
||||
return ('%s, Assembler::%s, %s);'
|
||||
return ('%s, Assembler::%s, %s);'
|
||||
% (ThreeRegInstruction.cstr(self),
|
||||
self.kind.cstr(), self.distance))
|
||||
|
||||
@ -314,9 +319,9 @@ class ArithOp(ThreeRegInstruction):
|
||||
self.distance))
|
||||
|
||||
class AddSubCarryOp(ThreeRegInstruction):
|
||||
|
||||
|
||||
def cstr(self):
|
||||
return ('%s);'
|
||||
return ('%s);'
|
||||
% (ThreeRegInstruction.cstr(self)))
|
||||
|
||||
class AddSubExtendedOp(ThreeRegInstruction):
|
||||
@ -332,76 +337,75 @@ class AddSubExtendedOp(ThreeRegInstruction):
|
||||
|
||||
def cstr(self):
|
||||
return (super(AddSubExtendedOp, self).cstr()
|
||||
+ (", ext::" + AddSubExtendedOp.optNames[self.option]
|
||||
+ (", ext::" + AddSubExtendedOp.optNames[self.option]
|
||||
+ ", " + str(self.amount) + ");"))
|
||||
|
||||
|
||||
def astr(self):
|
||||
return (super(AddSubExtendedOp, self).astr()
|
||||
+ (", " + AddSubExtendedOp.optNames[self.option]
|
||||
+ (", " + AddSubExtendedOp.optNames[self.option]
|
||||
+ " #" + str(self.amount)))
|
||||
|
||||
class AddSubImmOp(TwoRegImmedInstruction):
|
||||
|
||||
def cstr(self):
|
||||
return super(AddSubImmOp, self).cstr() + ");"
|
||||
|
||||
|
||||
class LogicalImmOp(AddSubImmOp):
|
||||
|
||||
# These tables are legal immediate logical operands
|
||||
immediates32 \
|
||||
= [0x1, 0x3f, 0x1f0, 0x7e0,
|
||||
0x1c00, 0x3ff0, 0x8000, 0x1e000,
|
||||
0x3e000, 0x78000, 0xe0000, 0x100000,
|
||||
0x1fffe0, 0x3fe000, 0x780000, 0x7ffff8,
|
||||
0xff8000, 0x1800180, 0x1fffc00, 0x3c003c0,
|
||||
0x3ffff00, 0x7c00000, 0x7fffe00, 0xf000f00,
|
||||
0xfffe000, 0x18181818, 0x1ffc0000, 0x1ffffffe,
|
||||
0x3f003f00, 0x3fffe000, 0x60006000, 0x7f807f80,
|
||||
0x7ffffc00, 0x800001ff, 0x803fffff, 0x9f9f9f9f,
|
||||
0xc0000fff, 0xc0c0c0c0, 0xe0000000, 0xe003e003,
|
||||
0xe3ffffff, 0xf0000fff, 0xf0f0f0f0, 0xf80000ff,
|
||||
0xf83ff83f, 0xfc00007f, 0xfc1fffff, 0xfe0001ff,
|
||||
0xfe3fffff, 0xff003fff, 0xff800003, 0xff87ff87,
|
||||
0xffc00fff, 0xffe0000f, 0xffefffef, 0xfff1fff1,
|
||||
0xfff83fff, 0xfffc0fff, 0xfffe0fff, 0xffff3fff,
|
||||
0xffffc007, 0xffffe1ff, 0xfffff80f, 0xfffffe07,
|
||||
= [0x1, 0x3f, 0x1f0, 0x7e0,
|
||||
0x1c00, 0x3ff0, 0x8000, 0x1e000,
|
||||
0x3e000, 0x78000, 0xe0000, 0x100000,
|
||||
0x1fffe0, 0x3fe000, 0x780000, 0x7ffff8,
|
||||
0xff8000, 0x1800180, 0x1fffc00, 0x3c003c0,
|
||||
0x3ffff00, 0x7c00000, 0x7fffe00, 0xf000f00,
|
||||
0xfffe000, 0x18181818, 0x1ffc0000, 0x1ffffffe,
|
||||
0x3f003f00, 0x3fffe000, 0x60006000, 0x7f807f80,
|
||||
0x7ffffc00, 0x800001ff, 0x803fffff, 0x9f9f9f9f,
|
||||
0xc0000fff, 0xc0c0c0c0, 0xe0000000, 0xe003e003,
|
||||
0xe3ffffff, 0xf0000fff, 0xf0f0f0f0, 0xf80000ff,
|
||||
0xf83ff83f, 0xfc00007f, 0xfc1fffff, 0xfe0001ff,
|
||||
0xfe3fffff, 0xff003fff, 0xff800003, 0xff87ff87,
|
||||
0xffc00fff, 0xffe0000f, 0xffefffef, 0xfff1fff1,
|
||||
0xfff83fff, 0xfffc0fff, 0xfffe0fff, 0xffff3fff,
|
||||
0xffffc007, 0xffffe1ff, 0xfffff80f, 0xfffffe07,
|
||||
0xffffffbf, 0xfffffffd]
|
||||
|
||||
immediates \
|
||||
= [0x1, 0x1f80, 0x3fff0, 0x3ffffc,
|
||||
0x3fe0000, 0x1ffc0000, 0xf8000000, 0x3ffffc000,
|
||||
0xffffffe00, 0x3ffffff800, 0xffffc00000, 0x3f000000000,
|
||||
0x7fffffff800, 0x1fe000001fe0, 0x3ffffff80000, 0xc00000000000,
|
||||
0x1ffc000000000, 0x3ffff0003ffff, 0x7ffffffe00000, 0xfffffffffc000,
|
||||
0x1ffffffffffc00, 0x3fffffffffff00, 0x7ffffffffffc00, 0xffffffffff8000,
|
||||
0x1ffffffff800000, 0x3fffffc03fffffc, 0x7fffc0000000000, 0xff80ff80ff80ff8,
|
||||
0x1c00000000000000, 0x1fffffffffff0000, 0x3fffff803fffff80, 0x7fc000007fc00000,
|
||||
0x8000000000000000, 0x803fffff803fffff, 0xc000007fc000007f, 0xe00000000000ffff,
|
||||
0xe3ffffffffffffff, 0xf007f007f007f007, 0xf80003ffffffffff, 0xfc000003fc000003,
|
||||
0xfe000000007fffff, 0xff00000000007fff, 0xff800000000003ff, 0xffc00000000000ff,
|
||||
0xffe00000000003ff, 0xfff0000000003fff, 0xfff80000001fffff, 0xfffc0000fffc0000,
|
||||
0xfffe003fffffffff, 0xffff3fffffffffff, 0xffffc0000007ffff, 0xffffe01fffffe01f,
|
||||
0xfffff800000007ff, 0xfffffc0fffffffff, 0xffffff00003fffff, 0xffffffc0000007ff,
|
||||
0xfffffff0000001ff, 0xfffffffc00003fff, 0xffffffff07ffffff, 0xffffffffe003ffff,
|
||||
= [0x1, 0x1f80, 0x3fff0, 0x3ffffc,
|
||||
0x3fe0000, 0x1ffc0000, 0xf8000000, 0x3ffffc000,
|
||||
0xffffffe00, 0x3ffffff800, 0xffffc00000, 0x3f000000000,
|
||||
0x7fffffff800, 0x1fe000001fe0, 0x3ffffff80000, 0xc00000000000,
|
||||
0x1ffc000000000, 0x3ffff0003ffff, 0x7ffffffe00000, 0xfffffffffc000,
|
||||
0x1ffffffffffc00, 0x3fffffffffff00, 0x7ffffffffffc00, 0xffffffffff8000,
|
||||
0x1ffffffff800000, 0x3fffffc03fffffc, 0x7fffc0000000000, 0xff80ff80ff80ff8,
|
||||
0x1c00000000000000, 0x1fffffffffff0000, 0x3fffff803fffff80, 0x7fc000007fc00000,
|
||||
0x8000000000000000, 0x803fffff803fffff, 0xc000007fc000007f, 0xe00000000000ffff,
|
||||
0xe3ffffffffffffff, 0xf007f007f007f007, 0xf80003ffffffffff, 0xfc000003fc000003,
|
||||
0xfe000000007fffff, 0xff00000000007fff, 0xff800000000003ff, 0xffc00000000000ff,
|
||||
0xffe00000000003ff, 0xfff0000000003fff, 0xfff80000001fffff, 0xfffc0000fffc0000,
|
||||
0xfffe003fffffffff, 0xffff3fffffffffff, 0xffffc0000007ffff, 0xffffe01fffffe01f,
|
||||
0xfffff800000007ff, 0xfffffc0fffffffff, 0xffffff00003fffff, 0xffffffc0000007ff,
|
||||
0xfffffff0000001ff, 0xfffffffc00003fff, 0xffffffff07ffffff, 0xffffffffe003ffff,
|
||||
0xfffffffffc01ffff, 0xffffffffffc00003, 0xfffffffffffc000f, 0xffffffffffffe07f]
|
||||
|
||||
def generate(self):
|
||||
AddSubImmOp.generate(self)
|
||||
self.immed = \
|
||||
self.immediates32[random.randint(0, len(self.immediates32)-1)] \
|
||||
if self.isWord \
|
||||
else \
|
||||
self.immediates[random.randint(0, len(self.immediates)-1)]
|
||||
|
||||
if self.isWord else \
|
||||
self.immediates[random.randint(0, len(self.immediates)-1)]
|
||||
|
||||
return self
|
||||
|
||||
|
||||
def astr(self):
|
||||
return (super(TwoRegImmedInstruction, self).astr()
|
||||
+ ', #0x%x' % self.immed)
|
||||
|
||||
def cstr(self):
|
||||
return super(AddSubImmOp, self).cstr() + "ll);"
|
||||
|
||||
|
||||
class MultiOp():
|
||||
|
||||
def multipleForms(self):
|
||||
@ -422,9 +426,9 @@ class AbsOp(MultiOp, Instruction):
|
||||
return Instruction.astr(self) + "%s"
|
||||
|
||||
class RegAndAbsOp(MultiOp, Instruction):
|
||||
|
||||
|
||||
def multipleForms(self):
|
||||
if self.name() == "adrp":
|
||||
if self.name() == "adrp":
|
||||
# We can only test one form of adrp because anything other
|
||||
# than "adrp ." requires relocs in the assembler output
|
||||
return 1
|
||||
@ -434,11 +438,11 @@ class RegAndAbsOp(MultiOp, Instruction):
|
||||
Instruction.generate(self)
|
||||
self.reg = GeneralRegister().generate()
|
||||
return self
|
||||
|
||||
|
||||
def cstr(self):
|
||||
if self.name() == "adrp":
|
||||
return "__ _adrp(" + "%s, %s);" % (self.reg, "%s")
|
||||
return (super(RegAndAbsOp, self).cstr()
|
||||
return (super(RegAndAbsOp, self).cstr()
|
||||
+ "%s, %s);" % (self.reg, "%s"))
|
||||
|
||||
def astr(self):
|
||||
@ -446,14 +450,14 @@ class RegAndAbsOp(MultiOp, Instruction):
|
||||
+ self.reg.astr(self.asmRegPrefix) + ", %s")
|
||||
|
||||
class RegImmAbsOp(RegAndAbsOp):
|
||||
|
||||
|
||||
def cstr(self):
|
||||
return (Instruction.cstr(self)
|
||||
+ "%s, %s, %s);" % (self.reg, self.immed, "%s"))
|
||||
|
||||
def astr(self):
|
||||
return (Instruction.astr(self)
|
||||
+ ("%s, #%s, %s"
|
||||
+ ("%s, #%s, %s"
|
||||
% (self.reg.astr(self.asmRegPrefix), self.immed, "%s")))
|
||||
|
||||
def generate(self):
|
||||
@ -462,7 +466,7 @@ class RegImmAbsOp(RegAndAbsOp):
|
||||
return self
|
||||
|
||||
class MoveWideImmOp(RegImmAbsOp):
|
||||
|
||||
|
||||
def multipleForms(self):
|
||||
return 0
|
||||
|
||||
@ -472,8 +476,8 @@ class MoveWideImmOp(RegImmAbsOp):
|
||||
|
||||
def astr(self):
|
||||
return (Instruction.astr(self)
|
||||
+ ("%s, #%s, lsl %s"
|
||||
% (self.reg.astr(self.asmRegPrefix),
|
||||
+ ("%s, #%s, lsl %s"
|
||||
% (self.reg.astr(self.asmRegPrefix),
|
||||
self.immed, self.shift)))
|
||||
|
||||
def generate(self):
|
||||
@ -486,7 +490,7 @@ class MoveWideImmOp(RegImmAbsOp):
|
||||
return self
|
||||
|
||||
class BitfieldOp(TwoRegInstruction):
|
||||
|
||||
|
||||
def cstr(self):
|
||||
return (Instruction.cstr(self)
|
||||
+ ("%s, %s, %s, %s);"
|
||||
@ -513,16 +517,16 @@ class ExtractOp(ThreeRegInstruction):
|
||||
def cstr(self):
|
||||
return (ThreeRegInstruction.cstr(self)
|
||||
+ (", %s);" % self.lsb))
|
||||
|
||||
|
||||
def astr(self):
|
||||
return (ThreeRegInstruction.astr(self)
|
||||
+ (", #%s" % self.lsb))
|
||||
|
||||
|
||||
class CondBranchOp(MultiOp, Instruction):
|
||||
|
||||
def cstr(self):
|
||||
return "__ br(Assembler::" + self.name() + ", %s);"
|
||||
|
||||
|
||||
def astr(self):
|
||||
return "b." + self.name() + "\t%s"
|
||||
|
||||
@ -530,10 +534,10 @@ class ImmOp(Instruction):
|
||||
|
||||
def cstr(self):
|
||||
return "%s%s);" % (Instruction.cstr(self), self.immed)
|
||||
|
||||
|
||||
def astr(self):
|
||||
return Instruction.astr(self) + "#" + str(self.immed)
|
||||
|
||||
|
||||
def generate(self):
|
||||
self.immed = random.randint(0, 1<<16 -1)
|
||||
return self
|
||||
@ -542,6 +546,8 @@ class Op(Instruction):
|
||||
|
||||
def cstr(self):
|
||||
return Instruction.cstr(self) + ");"
|
||||
def astr(self):
|
||||
return self.aname();
|
||||
|
||||
class SystemOp(Instruction):
|
||||
|
||||
@ -573,11 +579,11 @@ class ConditionalCompareOp(TwoRegImmedInstruction):
|
||||
return self
|
||||
|
||||
def cstr(self):
|
||||
return (super(ConditionalCompareOp, self).cstr() + ", "
|
||||
return (super(ConditionalCompareOp, self).cstr() + ", "
|
||||
+ "Assembler::" + conditionCodes[self.cond] + ");")
|
||||
|
||||
def astr(self):
|
||||
return (super(ConditionalCompareOp, self).astr() +
|
||||
return (super(ConditionalCompareOp, self).astr() +
|
||||
", " + conditionCodes[self.cond])
|
||||
|
||||
class ConditionalCompareImmedOp(Instruction):
|
||||
@ -596,33 +602,33 @@ class ConditionalCompareImmedOp(Instruction):
|
||||
+ "Assembler::" + conditionCodes[self.cond] + ");")
|
||||
|
||||
def astr(self):
|
||||
return (Instruction.astr(self)
|
||||
+ self.reg.astr(self.asmRegPrefix)
|
||||
return (Instruction.astr(self)
|
||||
+ self.reg.astr(self.asmRegPrefix)
|
||||
+ ", #" + str(self.immed)
|
||||
+ ", #" + str(self.immed2)
|
||||
+ ", " + conditionCodes[self.cond])
|
||||
|
||||
class TwoRegOp(TwoRegInstruction):
|
||||
|
||||
|
||||
def cstr(self):
|
||||
return TwoRegInstruction.cstr(self) + ");"
|
||||
|
||||
class ThreeRegOp(ThreeRegInstruction):
|
||||
|
||||
|
||||
def cstr(self):
|
||||
return ThreeRegInstruction.cstr(self) + ");"
|
||||
|
||||
class FourRegMulOp(FourRegInstruction):
|
||||
|
||||
|
||||
def cstr(self):
|
||||
return FourRegInstruction.cstr(self) + ");"
|
||||
|
||||
def astr(self):
|
||||
isMaddsub = self.name().startswith("madd") | self.name().startswith("msub")
|
||||
midPrefix = self.asmRegPrefix if isMaddsub else "w"
|
||||
return (Instruction.astr(self)
|
||||
+ self.reg[0].astr(self.asmRegPrefix)
|
||||
+ ", " + self.reg[1].astr(midPrefix)
|
||||
return (Instruction.astr(self)
|
||||
+ self.reg[0].astr(self.asmRegPrefix)
|
||||
+ ", " + self.reg[1].astr(midPrefix)
|
||||
+ ", " + self.reg[2].astr(midPrefix)
|
||||
+ ", " + self.reg[3].astr(self.asmRegPrefix))
|
||||
|
||||
@ -638,8 +644,8 @@ class ConditionalSelectOp(ThreeRegInstruction):
|
||||
+ "Assembler::" + conditionCodes[self.cond] + ");")
|
||||
|
||||
def astr(self):
|
||||
return (ThreeRegInstruction.astr(self)
|
||||
+ ", " + conditionCodes[self.cond])
|
||||
return (ThreeRegInstruction.astr(self)
|
||||
+ ", " + conditionCodes[self.cond])
|
||||
|
||||
class LoadStoreExclusiveOp(InstructionWithModes):
|
||||
|
||||
@ -651,7 +657,7 @@ class LoadStoreExclusiveOp(InstructionWithModes):
|
||||
result = self.aname() + '\t'
|
||||
regs = list(self.regs)
|
||||
index = regs.pop() # The last reg is the index register
|
||||
prefix = ('x' if (self.mode == 'x')
|
||||
prefix = ('x' if (self.mode == 'x')
|
||||
& ((self.name().startswith("ld"))
|
||||
| (self.name().startswith("stlr"))) # Ewww :-(
|
||||
else 'w')
|
||||
@ -698,17 +704,17 @@ class LoadStoreExclusiveOp(InstructionWithModes):
|
||||
return self._name
|
||||
|
||||
class Address(object):
|
||||
|
||||
|
||||
base_plus_unscaled_offset, pre, post, base_plus_reg, \
|
||||
base_plus_scaled_offset, pcrel, post_reg, base_only = range(8)
|
||||
kinds = ["base_plus_unscaled_offset", "pre", "post", "base_plus_reg",
|
||||
kinds = ["base_plus_unscaled_offset", "pre", "post", "base_plus_reg",
|
||||
"base_plus_scaled_offset", "pcrel", "post_reg", "base_only"]
|
||||
extend_kinds = ["uxtw", "lsl", "sxtw", "sxtx"]
|
||||
|
||||
@classmethod
|
||||
def kindToStr(cls, i):
|
||||
return cls.kinds[i]
|
||||
|
||||
|
||||
def generate(self, kind, shift_distance):
|
||||
self.kind = kind
|
||||
self.base = GeneralRegister().generate()
|
||||
@ -738,7 +744,7 @@ class Address(object):
|
||||
Address.pcrel: "",
|
||||
Address.base_plus_reg: "Address(%s, %s, Address::%s(%s))" \
|
||||
% (self.base, self.index, self.extend_kind, self.shift_distance),
|
||||
Address.base_plus_scaled_offset:
|
||||
Address.base_plus_scaled_offset:
|
||||
"Address(%s, %s)" % (self.base, self.offset) } [self.kind]
|
||||
if (self.kind == Address.pcrel):
|
||||
result = ["__ pc()", "back", "forth"][self.offset]
|
||||
@ -758,7 +764,7 @@ class Address(object):
|
||||
Address.base_only: "[%s]" % (self.base.astr(prefix)),
|
||||
Address.pcrel: "",
|
||||
Address.base_plus_reg: "[%s, %s, %s #%s]" \
|
||||
% (self.base.astr(prefix), self.index.astr(extend_prefix),
|
||||
% (self.base.astr(prefix), self.index.astr(extend_prefix),
|
||||
self.extend_kind, self.shift_distance),
|
||||
Address.base_plus_scaled_offset: \
|
||||
"[%s, %s]" \
|
||||
@ -767,7 +773,7 @@ class Address(object):
|
||||
if (self.kind == Address.pcrel):
|
||||
result = [".", "back", "forth"][self.offset]
|
||||
return result
|
||||
|
||||
|
||||
class LoadStoreOp(InstructionWithModes):
|
||||
|
||||
def __init__(self, args):
|
||||
@ -822,14 +828,14 @@ class LoadStoreOp(InstructionWithModes):
|
||||
class LoadStorePairOp(InstructionWithModes):
|
||||
|
||||
numRegs = 2
|
||||
|
||||
|
||||
def __init__(self, args):
|
||||
name, self.asmname, self.kind, mode = args
|
||||
InstructionWithModes.__init__(self, name, mode)
|
||||
self.offset = random.randint(-1<<4, 1<<4-1) << 4
|
||||
|
||||
|
||||
def generate(self):
|
||||
self.reg = [OperandFactory.create(self.mode).generate()
|
||||
self.reg = [OperandFactory.create(self.mode).generate()
|
||||
for i in range(self.numRegs)]
|
||||
self.base = OperandFactory.create('x').generate()
|
||||
kindStr = Address.kindToStr(self.kind);
|
||||
@ -846,8 +852,8 @@ class LoadStorePairOp(InstructionWithModes):
|
||||
address = ["[%s, #%s]", "[%s, #%s]!", "[%s], #%s"][self.kind]
|
||||
address = address % (self.base.astr('x'), self.offset)
|
||||
result = "%s\t%s, %s, %s" \
|
||||
% (self.asmname,
|
||||
self.reg[0].astr(self.asmRegPrefix),
|
||||
% (self.asmname,
|
||||
self.reg[0].astr(self.asmRegPrefix),
|
||||
self.reg[1].astr(self.asmRegPrefix), address)
|
||||
return result
|
||||
|
||||
@ -875,7 +881,7 @@ class FloatInstruction(Instruction):
|
||||
Instruction.__init__(self, name)
|
||||
|
||||
def generate(self):
|
||||
self.reg = [OperandFactory.create(self.modes[i]).generate()
|
||||
self.reg = [OperandFactory.create(self.modes[i]).generate()
|
||||
for i in range(self.numRegs)]
|
||||
return self
|
||||
|
||||
@ -884,7 +890,7 @@ class FloatInstruction(Instruction):
|
||||
return (formatStr
|
||||
% tuple([Instruction.cstr(self)] +
|
||||
[str(self.reg[i]) for i in range(self.numRegs)])) # Yowza
|
||||
|
||||
|
||||
def astr(self):
|
||||
formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
|
||||
return (formatStr
|
||||
@ -985,7 +991,7 @@ class SVEReductionOp(Instruction):
|
||||
moreReg +
|
||||
[str(self.reg[2]) + self._width.astr()])
|
||||
|
||||
class LdStSIMDOp(Instruction):
|
||||
class LdStNEONOp(Instruction):
|
||||
def __init__(self, args):
|
||||
self._name, self.regnum, self.arrangement, self.addresskind = args
|
||||
|
||||
@ -1004,7 +1010,7 @@ class LdStSIMDOp(Instruction):
|
||||
return self
|
||||
|
||||
def cstr(self):
|
||||
buf = super(LdStSIMDOp, self).cstr() + str(self._firstSIMDreg)
|
||||
buf = super(LdStNEONOp, self).cstr() + str(self._firstSIMDreg)
|
||||
current = self._firstSIMDreg
|
||||
for cnt in range(1, self.regnum):
|
||||
buf = '%s, %s' % (buf, current.nextReg())
|
||||
@ -1022,6 +1028,57 @@ class LdStSIMDOp(Instruction):
|
||||
def aname(self):
|
||||
return self._name
|
||||
|
||||
class NEONReduceInstruction(Instruction):
|
||||
def __init__(self, args):
|
||||
self._name, self.insname, self.arrangement = args
|
||||
|
||||
def generate(self):
|
||||
current = FloatRegister().generate()
|
||||
self.dstSIMDreg = current
|
||||
self.srcSIMDreg = current.nextReg()
|
||||
return self
|
||||
|
||||
def cstr(self):
|
||||
buf = Instruction.cstr(self) + str(self.dstSIMDreg)
|
||||
buf = '%s, __ T%s, %s);' % (buf, self.arrangement, self.srcSIMDreg)
|
||||
return buf
|
||||
|
||||
def astr(self):
|
||||
buf = '%s\t%s' % (self.insname, self.dstSIMDreg.astr(self.arrangement[-1].lower()))
|
||||
buf = '%s, %s.%s' % (buf, self.srcSIMDreg, self.arrangement)
|
||||
return buf
|
||||
|
||||
def aname(self):
|
||||
return self._name
|
||||
|
||||
class CommonNEONInstruction(Instruction):
|
||||
def __init__(self, args):
|
||||
self._name, self.insname, self.arrangement = args
|
||||
|
||||
def generate(self):
|
||||
self._firstSIMDreg = FloatRegister().generate()
|
||||
return self
|
||||
|
||||
def cstr(self):
|
||||
buf = Instruction.cstr(self) + str(self._firstSIMDreg)
|
||||
buf = '%s, __ T%s' % (buf, self.arrangement)
|
||||
current = self._firstSIMDreg
|
||||
for cnt in range(1, self.numRegs):
|
||||
buf = '%s, %s' % (buf, current.nextReg())
|
||||
current = current.nextReg()
|
||||
return '%s);' % (buf)
|
||||
|
||||
def astr(self):
|
||||
buf = '%s\t%s.%s' % (self.insname, self._firstSIMDreg, self.arrangement)
|
||||
current = self._firstSIMDreg
|
||||
for cnt in range(1, self.numRegs):
|
||||
buf = '%s, %s.%s' % (buf, current.nextReg(), self.arrangement)
|
||||
current = current.nextReg()
|
||||
return buf
|
||||
|
||||
def aname(self):
|
||||
return self._name
|
||||
|
||||
class SHA512SIMDOp(Instruction):
|
||||
|
||||
def generate(self):
|
||||
@ -1097,6 +1154,12 @@ class FloatConvertOp(TwoRegFloatOp):
|
||||
def cname(self):
|
||||
return self._cname
|
||||
|
||||
class TwoRegNEONOp(CommonNEONInstruction):
|
||||
numRegs = 2
|
||||
|
||||
class ThreeRegNEONOp(TwoRegNEONOp):
|
||||
numRegs = 3
|
||||
|
||||
class SpecialCases(Instruction):
|
||||
def __init__(self, data):
|
||||
self._name = data[0]
|
||||
@ -1129,6 +1192,7 @@ def generate(kind, names):
|
||||
|
||||
outfile = open("aarch64ops.s", "w")
|
||||
|
||||
# To minimize the changes of assembler test code
|
||||
random.seed(0)
|
||||
|
||||
print "// BEGIN Generated code -- do not edit"
|
||||
@ -1139,18 +1203,18 @@ print " __ bind(back);"
|
||||
|
||||
outfile.write("back:\n")
|
||||
|
||||
generate (ArithOp,
|
||||
generate (ArithOp,
|
||||
[ "add", "sub", "adds", "subs",
|
||||
"addw", "subw", "addsw", "subsw",
|
||||
"and", "orr", "eor", "ands",
|
||||
"andw", "orrw", "eorw", "andsw",
|
||||
"bic", "orn", "eon", "bics",
|
||||
"andw", "orrw", "eorw", "andsw",
|
||||
"bic", "orn", "eon", "bics",
|
||||
"bicw", "ornw", "eonw", "bicsw" ])
|
||||
|
||||
generate (AddSubImmOp,
|
||||
generate (AddSubImmOp,
|
||||
[ "addw", "addsw", "subw", "subsw",
|
||||
"add", "adds", "sub", "subs"])
|
||||
generate (LogicalImmOp,
|
||||
generate (LogicalImmOp,
|
||||
[ "andw", "orrw", "eorw", "andsw",
|
||||
"and", "orr", "eor", "ands"])
|
||||
|
||||
@ -1191,26 +1255,26 @@ for mode in 'xw':
|
||||
["stxp", mode, 4], ["stlxp", mode, 4]])
|
||||
|
||||
for kind in range(6):
|
||||
print "\n// " + Address.kindToStr(kind),
|
||||
sys.stdout.write("\n// " + Address.kindToStr(kind))
|
||||
if kind != Address.pcrel:
|
||||
generate (LoadStoreOp,
|
||||
[["str", "str", kind, "x"], ["str", "str", kind, "w"],
|
||||
generate (LoadStoreOp,
|
||||
[["str", "str", kind, "x"], ["str", "str", kind, "w"],
|
||||
["str", "strb", kind, "b"], ["str", "strh", kind, "h"],
|
||||
["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"],
|
||||
["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"],
|
||||
["ldr", "ldrb", kind, "b"], ["ldr", "ldrh", kind, "h"],
|
||||
["ldrsb", "ldrsb", kind, "x"], ["ldrsh", "ldrsh", kind, "x"],
|
||||
["ldrsb", "ldrsb", kind, "x"], ["ldrsh", "ldrsh", kind, "x"],
|
||||
["ldrsh", "ldrsh", kind, "w"], ["ldrsw", "ldrsw", kind, "x"],
|
||||
["ldr", "ldr", kind, "d"], ["ldr", "ldr", kind, "s"],
|
||||
["str", "str", kind, "d"], ["str", "str", kind, "s"],
|
||||
["ldr", "ldr", kind, "d"], ["ldr", "ldr", kind, "s"],
|
||||
["str", "str", kind, "d"], ["str", "str", kind, "s"],
|
||||
])
|
||||
else:
|
||||
generate (LoadStoreOp,
|
||||
generate (LoadStoreOp,
|
||||
[["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"]])
|
||||
|
||||
|
||||
|
||||
for kind in (Address.base_plus_unscaled_offset, Address.pcrel, Address.base_plus_reg, \
|
||||
Address.base_plus_scaled_offset):
|
||||
generate (LoadStoreOp,
|
||||
generate (LoadStoreOp,
|
||||
[["prfm", "prfm\tPLDL1KEEP,", kind, "x"]])
|
||||
|
||||
generate(AddSubCarryOp, ["adcw", "adcsw", "sbcw", "sbcsw", "adc", "adcs", "sbc", "sbcs"])
|
||||
@ -1219,32 +1283,32 @@ generate(AddSubExtendedOp, ["addw", "addsw", "sub", "subsw", "add", "adds", "sub
|
||||
|
||||
generate(ConditionalCompareOp, ["ccmnw", "ccmpw", "ccmn", "ccmp"])
|
||||
generate(ConditionalCompareImmedOp, ["ccmnw", "ccmpw", "ccmn", "ccmp"])
|
||||
generate(ConditionalSelectOp,
|
||||
generate(ConditionalSelectOp,
|
||||
["cselw", "csincw", "csinvw", "csnegw", "csel", "csinc", "csinv", "csneg"])
|
||||
|
||||
generate(TwoRegOp,
|
||||
["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit",
|
||||
generate(TwoRegOp,
|
||||
["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit",
|
||||
"rev16", "rev32", "rev", "clz", "cls"])
|
||||
generate(ThreeRegOp,
|
||||
["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv",
|
||||
generate(ThreeRegOp,
|
||||
["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv",
|
||||
"lslv", "lsrv", "asrv", "rorv", "umulh", "smulh"])
|
||||
generate(FourRegMulOp,
|
||||
generate(FourRegMulOp,
|
||||
["maddw", "msubw", "madd", "msub", "smaddl", "smsubl", "umaddl", "umsubl"])
|
||||
|
||||
generate(ThreeRegFloatOp,
|
||||
[["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
|
||||
generate(ThreeRegFloatOp,
|
||||
[["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
|
||||
["fmuls", "sss"],
|
||||
["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
|
||||
["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
|
||||
["fmuld", "ddd"]])
|
||||
|
||||
generate(FourRegFloatOp,
|
||||
[["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"],
|
||||
generate(FourRegFloatOp,
|
||||
[["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"],
|
||||
["fmaddd", "dddd"], ["fmsubd", "dddd"], ["fnmaddd", "dddd"], ["fnmaddd", "dddd"],])
|
||||
|
||||
generate(TwoRegFloatOp,
|
||||
[["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"],
|
||||
generate(TwoRegFloatOp,
|
||||
[["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"],
|
||||
["fcvts", "ds"],
|
||||
["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"],
|
||||
["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"],
|
||||
["fcvtd", "sd"],
|
||||
])
|
||||
|
||||
@ -1255,18 +1319,18 @@ generate(FloatConvertOp, [["fcvtzsw", "fcvtzs", "ws"], ["fcvtzs", "fcvtzs", "xs"
|
||||
["fmovs", "fmov", "ws"], ["fmovd", "fmov", "xd"],
|
||||
["fmovs", "fmov", "sw"], ["fmovd", "fmov", "dx"]])
|
||||
|
||||
generate(TwoRegFloatOp, [["fcmps", "ss"], ["fcmpd", "dd"],
|
||||
generate(TwoRegFloatOp, [["fcmps", "ss"], ["fcmpd", "dd"],
|
||||
["fcmps", "sz"], ["fcmpd", "dz"]])
|
||||
|
||||
for kind in range(3):
|
||||
generate(LoadStorePairOp, [["stp", "stp", kind, "w"], ["ldp", "ldp", kind, "w"],
|
||||
["ldpsw", "ldpsw", kind, "x"],
|
||||
["ldpsw", "ldpsw", kind, "x"],
|
||||
["stp", "stp", kind, "x"], ["ldp", "ldp", kind, "x"]
|
||||
])
|
||||
generate(LoadStorePairOp, [["stnp", "stnp", 0, "w"], ["ldnp", "ldnp", 0, "w"],
|
||||
["stnp", "stnp", 0, "x"], ["ldnp", "ldnp", 0, "x"]])
|
||||
|
||||
generate(LdStSIMDOp, [["ld1", 1, "8B", Address.base_only],
|
||||
generate(LdStNEONOp, [["ld1", 1, "8B", Address.base_only],
|
||||
["ld1", 2, "16B", Address.post],
|
||||
["ld1", 3, "1D", Address.post_reg],
|
||||
["ld1", 4, "8H", Address.post],
|
||||
@ -1290,6 +1354,93 @@ generate(LdStSIMDOp, [["ld1", 1, "8B", Address.base_only],
|
||||
["ld4r", 4, "2S", Address.post_reg],
|
||||
])
|
||||
|
||||
generate(NEONReduceInstruction,
|
||||
[["addv", "addv", "8B"], ["addv", "addv", "16B"],
|
||||
["addv", "addv", "4H"], ["addv", "addv", "8H"],
|
||||
["addv", "addv", "4S"],
|
||||
["smaxv", "smaxv", "8B"], ["smaxv", "smaxv", "16B"],
|
||||
["smaxv", "smaxv", "4H"], ["smaxv", "smaxv", "8H"],
|
||||
["smaxv", "smaxv", "4S"], ["fmaxv", "fmaxv", "4S"],
|
||||
["sminv", "sminv", "8B"], ["sminv", "sminv", "16B"],
|
||||
["sminv", "sminv", "4H"], ["sminv", "sminv", "8H"],
|
||||
["sminv", "sminv", "4S"], ["fminv", "fminv", "4S"],
|
||||
])
|
||||
|
||||
generate(TwoRegNEONOp,
|
||||
[["absr", "abs", "8B"], ["absr", "abs", "16B"],
|
||||
["absr", "abs", "4H"], ["absr", "abs", "8H"],
|
||||
["absr", "abs", "2S"], ["absr", "abs", "4S"],
|
||||
["absr", "abs", "2D"],
|
||||
["fabs", "fabs", "2S"], ["fabs", "fabs", "4S"],
|
||||
["fabs", "fabs", "2D"],
|
||||
["fneg", "fneg", "2S"], ["fneg", "fneg", "4S"],
|
||||
["fneg", "fneg", "2D"],
|
||||
["fsqrt", "fsqrt", "2S"], ["fsqrt", "fsqrt", "4S"],
|
||||
["fsqrt", "fsqrt", "2D"],
|
||||
["notr", "not", "8B"], ["notr", "not", "16B"],
|
||||
])
|
||||
|
||||
generate(ThreeRegNEONOp,
|
||||
[["andr", "and", "8B"], ["andr", "and", "16B"],
|
||||
["orr", "orr", "8B"], ["orr", "orr", "16B"],
|
||||
["eor", "eor", "8B"], ["eor", "eor", "16B"],
|
||||
["addv", "add", "8B"], ["addv", "add", "16B"],
|
||||
["addv", "add", "4H"], ["addv", "add", "8H"],
|
||||
["addv", "add", "2S"], ["addv", "add", "4S"],
|
||||
["addv", "add", "2D"],
|
||||
["fadd", "fadd", "2S"], ["fadd", "fadd", "4S"],
|
||||
["fadd", "fadd", "2D"],
|
||||
["subv", "sub", "8B"], ["subv", "sub", "16B"],
|
||||
["subv", "sub", "4H"], ["subv", "sub", "8H"],
|
||||
["subv", "sub", "2S"], ["subv", "sub", "4S"],
|
||||
["subv", "sub", "2D"],
|
||||
["fsub", "fsub", "2S"], ["fsub", "fsub", "4S"],
|
||||
["fsub", "fsub", "2D"],
|
||||
["mulv", "mul", "8B"], ["mulv", "mul", "16B"],
|
||||
["mulv", "mul", "4H"], ["mulv", "mul", "8H"],
|
||||
["mulv", "mul", "2S"], ["mulv", "mul", "4S"],
|
||||
["fmul", "fmul", "2S"], ["fmul", "fmul", "4S"],
|
||||
["fmul", "fmul", "2D"],
|
||||
["mlav", "mla", "4H"], ["mlav", "mla", "8H"],
|
||||
["mlav", "mla", "2S"], ["mlav", "mla", "4S"],
|
||||
["fmla", "fmla", "2S"], ["fmla", "fmla", "4S"],
|
||||
["fmla", "fmla", "2D"],
|
||||
["mlsv", "mls", "4H"], ["mlsv", "mls", "8H"],
|
||||
["mlsv", "mls", "2S"], ["mlsv", "mls", "4S"],
|
||||
["fmls", "fmls", "2S"], ["fmls", "fmls", "4S"],
|
||||
["fmls", "fmls", "2D"],
|
||||
["fdiv", "fdiv", "2S"], ["fdiv", "fdiv", "4S"],
|
||||
["fdiv", "fdiv", "2D"],
|
||||
["maxv", "smax", "8B"], ["maxv", "smax", "16B"],
|
||||
["maxv", "smax", "4H"], ["maxv", "smax", "8H"],
|
||||
["maxv", "smax", "2S"], ["maxv", "smax", "4S"],
|
||||
["fmax", "fmax", "2S"], ["fmax", "fmax", "4S"],
|
||||
["fmax", "fmax", "2D"],
|
||||
["minv", "smin", "8B"], ["minv", "smin", "16B"],
|
||||
["minv", "smin", "4H"], ["minv", "smin", "8H"],
|
||||
["minv", "smin", "2S"], ["minv", "smin", "4S"],
|
||||
["fmin", "fmin", "2S"], ["fmin", "fmin", "4S"],
|
||||
["fmin", "fmin", "2D"],
|
||||
["cmeq", "cmeq", "8B"], ["cmeq", "cmeq", "16B"],
|
||||
["cmeq", "cmeq", "4H"], ["cmeq", "cmeq", "8H"],
|
||||
["cmeq", "cmeq", "2S"], ["cmeq", "cmeq", "4S"],
|
||||
["cmeq", "cmeq", "2D"],
|
||||
["fcmeq", "fcmeq", "2S"], ["fcmeq", "fcmeq", "4S"],
|
||||
["fcmeq", "fcmeq", "2D"],
|
||||
["cmgt", "cmgt", "8B"], ["cmgt", "cmgt", "16B"],
|
||||
["cmgt", "cmgt", "4H"], ["cmgt", "cmgt", "8H"],
|
||||
["cmgt", "cmgt", "2S"], ["cmgt", "cmgt", "4S"],
|
||||
["cmgt", "cmgt", "2D"],
|
||||
["fcmgt", "fcmgt", "2S"], ["fcmgt", "fcmgt", "4S"],
|
||||
["fcmgt", "fcmgt", "2D"],
|
||||
["cmge", "cmge", "8B"], ["cmge", "cmge", "16B"],
|
||||
["cmge", "cmge", "4H"], ["cmge", "cmge", "8H"],
|
||||
["cmge", "cmge", "2S"], ["cmge", "cmge", "4S"],
|
||||
["cmge", "cmge", "2D"],
|
||||
["fcmge", "fcmge", "2S"], ["fcmge", "fcmge", "4S"],
|
||||
["fcmge", "fcmge", "2D"],
|
||||
])
|
||||
|
||||
generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
|
||||
|
||||
generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"],
|
||||
@ -1344,9 +1495,9 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
])
|
||||
|
||||
print "\n// FloatImmediateOp"
|
||||
for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
|
||||
"0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
|
||||
"-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
|
||||
for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
|
||||
"0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
|
||||
"-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
|
||||
"-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"):
|
||||
astr = "fmov d0, #" + float
|
||||
cstr = "__ fmovd(v0, " + float + ");"
|
||||
@ -1414,16 +1565,11 @@ outfile.write("forth:\n")
|
||||
|
||||
outfile.close()
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension.
|
||||
subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
|
||||
|
||||
print
|
||||
print "/*",
|
||||
sys.stdout.flush()
|
||||
subprocess.check_call([AARCH64_OBJDUMP, "-d", "aarch64ops.o"])
|
||||
print "/*"
|
||||
print "*/"
|
||||
|
||||
subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
|
||||
@ -1444,4 +1590,7 @@ while i < len(bytes):
|
||||
print "\n };"
|
||||
print "// END Generated code -- do not edit"
|
||||
|
||||
infile.close()
|
||||
|
||||
for f in ["aarch64ops.s", "aarch64ops.o", "aarch64ops.bin"]:
|
||||
os.remove(f)
|
||||
|
||||
@ -2410,6 +2410,12 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
break;
|
||||
case Op_MulVL:
|
||||
return false;
|
||||
case Op_VectorLoadShuffle:
|
||||
case Op_VectorRearrange:
|
||||
if (vlen < 4) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -2421,6 +2427,10 @@ const bool Matcher::has_predicated_vectors(void) {
|
||||
return UseSVE > 0;
|
||||
}
|
||||
|
||||
bool Matcher::supports_vector_variable_shifts(void) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
@ -2466,11 +2476,18 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
||||
if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
|
||||
// Currently vector length less than SVE vector register size is not supported.
|
||||
return max_size;
|
||||
} else {
|
||||
// For the moment limit the vector size to 8 bytes with NEON.
|
||||
} else { // NEON
|
||||
// Limit the vector size to 8 bytes
|
||||
int size = 8 / type2aelembytes(bt);
|
||||
if (bt == T_BYTE) {
|
||||
// To support vector api shuffle/rearrange.
|
||||
size = 4;
|
||||
} else if (bt == T_BOOLEAN) {
|
||||
// To support vector api load/store mask.
|
||||
size = 2;
|
||||
}
|
||||
if (size < 2) size = 2;
|
||||
return size;
|
||||
return MIN2(size,max_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2489,6 +2506,9 @@ const uint Matcher::vector_ideal_reg(int len) {
|
||||
return Op_VecA;
|
||||
}
|
||||
switch(len) {
|
||||
// For 16-bit/32-bit mask vector, reuse VecD.
|
||||
case 2:
|
||||
case 4:
|
||||
case 8: return Op_VecD;
|
||||
case 16: return Op_VecX;
|
||||
}
|
||||
@ -3131,6 +3151,12 @@ encode %{
|
||||
// END Non-volatile memory access
|
||||
|
||||
// Vector loads and stores
|
||||
enc_class aarch64_enc_ldrvH(vecD dst, memory mem) %{
|
||||
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::H,
|
||||
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
|
||||
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
|
||||
@ -3149,6 +3175,12 @@ encode %{
|
||||
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_strvH(vecD src, memory mem) %{
|
||||
FloatRegister src_reg = as_FloatRegister($src$$reg);
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::H,
|
||||
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_strvS(vecD src, memory mem) %{
|
||||
FloatRegister src_reg = as_FloatRegister($src$$reg);
|
||||
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
|
||||
@ -4252,6 +4284,26 @@ operand immI_31()
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immI_2()
|
||||
%{
|
||||
predicate(n->get_int() == 2);
|
||||
match(ConI);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immI_4()
|
||||
%{
|
||||
predicate(n->get_int() == 4);
|
||||
match(ConI);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immI_8()
|
||||
%{
|
||||
predicate(n->get_int() == 8);
|
||||
@ -11222,6 +11274,7 @@ instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
|
||||
%}
|
||||
|
||||
// BEGIN This section of the file is automatically generated. Do not edit --------------
|
||||
// This section is generated from aarch64_ad.m4
|
||||
|
||||
|
||||
// This pattern is automatically generated from aarch64_ad.m4
|
||||
@ -16848,6 +16901,7 @@ instruct replicate2D(vecX dst, vRegD src)
|
||||
|
||||
instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
|
||||
%{
|
||||
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
||||
match(Set dst (AddReductionVI isrc vsrc));
|
||||
ins_cost(INSN_COST);
|
||||
effect(TEMP tmp, TEMP tmp2);
|
||||
@ -16867,6 +16921,7 @@ instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp,
|
||||
|
||||
instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
|
||||
%{
|
||||
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
||||
match(Set dst (AddReductionVI isrc vsrc));
|
||||
ins_cost(INSN_COST);
|
||||
effect(TEMP vtmp, TEMP itmp);
|
||||
@ -16885,6 +16940,7 @@ instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iReg
|
||||
|
||||
instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
|
||||
%{
|
||||
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
||||
match(Set dst (MulReductionVI isrc vsrc));
|
||||
ins_cost(INSN_COST);
|
||||
effect(TEMP tmp, TEMP dst);
|
||||
@ -16904,6 +16960,7 @@ instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
|
||||
|
||||
instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
|
||||
%{
|
||||
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
||||
match(Set dst (MulReductionVI isrc vsrc));
|
||||
ins_cost(INSN_COST);
|
||||
effect(TEMP vtmp, TEMP itmp, TEMP dst);
|
||||
@ -17985,8 +18042,7 @@ instruct vabs2F(vecD dst, vecD src)
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabs $dst,$src\t# vector (2S)" %}
|
||||
ins_encode %{
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T2S,
|
||||
as_FloatRegister($src$$reg));
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp64);
|
||||
%}
|
||||
@ -17998,8 +18054,7 @@ instruct vabs4F(vecX dst, vecX src)
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabs $dst,$src\t# vector (4S)" %}
|
||||
ins_encode %{
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T4S,
|
||||
as_FloatRegister($src$$reg));
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp128);
|
||||
%}
|
||||
@ -18011,8 +18066,7 @@ instruct vabs2D(vecX dst, vecX src)
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabs $dst,$src\t# vector (2D)" %}
|
||||
ins_encode %{
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T2D,
|
||||
as_FloatRegister($src$$reg));
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp128);
|
||||
%}
|
||||
@ -18153,7 +18207,8 @@ instruct vxor16B(vecX dst, vecX src1, vecX src2)
|
||||
|
||||
// ------------------------------ Shift ---------------------------------------
|
||||
instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
|
||||
predicate(n->as_Vector()->length_in_bytes() == 8);
|
||||
predicate(n->as_Vector()->length_in_bytes() == 4 ||
|
||||
n->as_Vector()->length_in_bytes() == 8);
|
||||
match(Set dst (LShiftCntV cnt));
|
||||
match(Set dst (RShiftCntV cnt));
|
||||
format %{ "dup $dst, $cnt\t# shift count vector (8B)" %}
|
||||
@ -18977,12 +19032,12 @@ instruct vpopcount4I(vecX dst, vecX src) %{
|
||||
"uaddlp $dst, $dst\t# vector (8H)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($src$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($src$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
|
||||
as_FloatRegister($dst$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
@ -18996,12 +19051,12 @@ instruct vpopcount2I(vecD dst, vecD src) %{
|
||||
"uaddlp $dst, $dst\t# vector (4H)"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T8B,
|
||||
as_FloatRegister($src$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ cnt(as_FloatRegister($dst$$reg), __ T8B,
|
||||
as_FloatRegister($src$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
|
||||
as_FloatRegister($dst$$reg));
|
||||
__ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
|
||||
as_FloatRegister($dst$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
dnl Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
||||
dnl Copyright (c) 2019, 2020, Red Hat Inc. All rights reserved.
|
||||
dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
dnl
|
||||
dnl This code is free software; you can redistribute it and/or modify it
|
||||
@ -19,10 +19,14 @@ dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
dnl or visit www.oracle.com if you need additional information or have any
|
||||
dnl questions.
|
||||
dnl
|
||||
dnl
|
||||
dnl Process this file with m4 aarch64_ad.m4 to generate the arithmetic
|
||||
dnl and shift patterns patterns used in aarch64.ad.
|
||||
dnl
|
||||
dnl Process this file with m4 aarch64_ad.m4 to generate instructions used in
|
||||
dnl aarch64.ad:
|
||||
dnl 1. the arithmetic
|
||||
dnl 2. shift patterns
|
||||
dnl
|
||||
// BEGIN This section of the file is automatically generated. Do not edit --------------
|
||||
// This section is generated from aarch64_ad.m4
|
||||
dnl
|
||||
define(`ORL2I', `ifelse($1,I,orL2I)')
|
||||
dnl
|
||||
|
||||
3456
src/hotspot/cpu/aarch64/aarch64_neon.ad
Normal file
3456
src/hotspot/cpu/aarch64/aarch64_neon.ad
Normal file
File diff suppressed because it is too large
Load Diff
1424
src/hotspot/cpu/aarch64/aarch64_neon_ad.m4
Normal file
1424
src/hotspot/cpu/aarch64/aarch64_neon_ad.m4
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1371,6 +1371,21 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
#define INSN(NAME, size, opc) \
|
||||
void NAME(FloatRegister Rt, Register Rn) { \
|
||||
starti; \
|
||||
f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21); \
|
||||
f(0, 20, 12), f(0b01, 11, 10); \
|
||||
rf(Rn, 5), rf((Register)Rt, 0); \
|
||||
}
|
||||
|
||||
INSN(ldrs, 0b10, 0b01);
|
||||
INSN(ldrd, 0b11, 0b01);
|
||||
INSN(ldrq, 0b00, 0b11);
|
||||
|
||||
#undef INSN
|
||||
|
||||
|
||||
#define INSN(NAME, opc, V) \
|
||||
void NAME(address dest, prfop op = PLDL1KEEP) { \
|
||||
int64_t offset = (dest - pc()) >> 2; \
|
||||
@ -1508,6 +1523,21 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
/* SIMD extensions
|
||||
*
|
||||
* We just use FloatRegister in the following. They are exactly the same
|
||||
* as SIMD registers.
|
||||
*/
|
||||
public:
|
||||
|
||||
enum SIMD_Arrangement {
|
||||
T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
|
||||
};
|
||||
|
||||
enum SIMD_RegVariant {
|
||||
B, H, S, D, Q
|
||||
};
|
||||
|
||||
enum shift_kind { LSL, LSR, ASR, ROR };
|
||||
|
||||
void op_shifted_reg(unsigned decode,
|
||||
@ -1887,6 +1917,30 @@ public:
|
||||
i_fmovs(Vd, Vn);
|
||||
}
|
||||
|
||||
private:
|
||||
void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
|
||||
FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
|
||||
assert((do_extend && (Tb >> 1) + 1 == (Ta >> 1))
|
||||
|| (!do_extend && (Ta >> 1) + 1 == (Tb >> 1)), "Incompatible arrangement");
|
||||
starti;
|
||||
int op30 = (do_extend ? Tb : Ta) & 1;
|
||||
int op22 = ((do_extend ? Ta : Tb) >> 1) & 1;
|
||||
f(0, 31), f(op30, 30), f(0b0011100, 29, 23), f(op22, 22);
|
||||
f(0b100001011, 21, 13), f(do_extend ? 1 : 0, 12), f(0b10, 11, 10);
|
||||
rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
public:
|
||||
void fcvtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
|
||||
assert(Tb == T4H || Tb == T8H|| Tb == T2S || Tb == T4S, "invalid arrangement");
|
||||
_fcvt_narrow_extend(Vd, Ta, Vn, Tb, true);
|
||||
}
|
||||
|
||||
void fcvtn(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
|
||||
assert(Ta == T4H || Ta == T8H|| Ta == T2S || Ta == T4S, "invalid arrangement");
|
||||
_fcvt_narrow_extend(Vd, Ta, Vn, Tb, false);
|
||||
}
|
||||
|
||||
#undef INSN
|
||||
|
||||
// Floating-point data-processing (2 source)
|
||||
@ -2023,6 +2077,43 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
enum sign_kind { SIGNED, UNSIGNED };
|
||||
|
||||
private:
|
||||
void _xcvtf_scalar_integer(sign_kind sign, unsigned sz,
|
||||
FloatRegister Rd, FloatRegister Rn) {
|
||||
starti;
|
||||
f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29);
|
||||
f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10);
|
||||
rf(Rn, 5), rf(Rd, 0);
|
||||
}
|
||||
|
||||
public:
|
||||
#define INSN(NAME, sign, sz) \
|
||||
void NAME(FloatRegister Rd, FloatRegister Rn) { \
|
||||
_xcvtf_scalar_integer(sign, sz, Rd, Rn); \
|
||||
}
|
||||
|
||||
INSN(scvtfs, SIGNED, 0);
|
||||
INSN(scvtfd, SIGNED, 1);
|
||||
|
||||
#undef INSN
|
||||
|
||||
private:
|
||||
void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T,
|
||||
FloatRegister Rd, FloatRegister Rn) {
|
||||
assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
|
||||
starti;
|
||||
f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29);
|
||||
f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10);
|
||||
rf(Rn, 5), rf(Rd, 0);
|
||||
}
|
||||
|
||||
public:
|
||||
void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) {
|
||||
_xcvtf_vector_integer(SIGNED, T, Rd, Rn);
|
||||
}
|
||||
|
||||
// Floating-point compare
|
||||
void float_compare(unsigned op31, unsigned type,
|
||||
unsigned op, unsigned op2,
|
||||
@ -2152,21 +2243,6 @@ public:
|
||||
INSN(frintzd, 0b01, 0b011);
|
||||
#undef INSN
|
||||
|
||||
/* SIMD extensions
|
||||
*
|
||||
* We just use FloatRegister in the following. They are exactly the same
|
||||
* as SIMD registers.
|
||||
*/
|
||||
public:
|
||||
|
||||
enum SIMD_Arrangement {
|
||||
T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
|
||||
};
|
||||
|
||||
enum SIMD_RegVariant {
|
||||
B, H, S, D, Q
|
||||
};
|
||||
|
||||
private:
|
||||
static short SIMD_Size_in_bytes[];
|
||||
|
||||
@ -2324,6 +2400,11 @@ public:
|
||||
INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(maxv, 0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(cmeq, 1, 0b100011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(cmgt, 0, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(cmge, 0, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
|
||||
#undef INSN
|
||||
|
||||
@ -2343,6 +2424,8 @@ public:
|
||||
INSN(negr, 1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(notr, 1, 0b100000010110, 0); // accepted arrangements: T8B, T16B
|
||||
INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
|
||||
INSN(smaxv, 0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
|
||||
INSN(sminv, 0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
|
||||
INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
|
||||
@ -2407,6 +2490,9 @@ public:
|
||||
INSN(fmls, 0, 1, 0b110011);
|
||||
INSN(fmax, 0, 0, 0b111101);
|
||||
INSN(fmin, 0, 1, 0b111101);
|
||||
INSN(fcmeq, 0, 0, 0b111001);
|
||||
INSN(fcmgt, 1, 1, 0b111001);
|
||||
INSN(fcmge, 1, 0, 0b111001);
|
||||
|
||||
#undef INSN
|
||||
|
||||
@ -2506,10 +2592,20 @@ public:
|
||||
rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
// (double) {a, b} -> (a + b)
|
||||
void faddpd(FloatRegister Vd, FloatRegister Vn) {
|
||||
// (long) {a, b} -> (a + b)
|
||||
void addpd(FloatRegister Vd, FloatRegister Vn) {
|
||||
starti;
|
||||
f(0b0111111001110000110110, 31, 10);
|
||||
f(0b0101111011110001101110, 31, 10);
|
||||
rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
// (Floating-point) {a, b} -> (a + b)
|
||||
void faddp(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {
|
||||
assert(type == D || type == S, "Wrong type for faddp");
|
||||
starti;
|
||||
f(0b011111100, 31, 23);
|
||||
f(type == D ? 1 : 0, 22);
|
||||
f(0b110000110110, 21, 10);
|
||||
rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
@ -2576,29 +2672,48 @@ public:
|
||||
#undef INSN
|
||||
|
||||
private:
|
||||
void _ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
|
||||
void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
|
||||
starti;
|
||||
/* The encodings for the immh:immb fields (bits 22:16) are
|
||||
* 0001 xxx 8H, 8B/16b shift = xxx
|
||||
* 0001 xxx 8H, 8B/16B shift = xxx
|
||||
* 001x xxx 4S, 4H/8H shift = xxxx
|
||||
* 01xx xxx 2D, 2S/4S shift = xxxxx
|
||||
* 1xxx xxx RESERVED
|
||||
*/
|
||||
assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement");
|
||||
assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value");
|
||||
f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16);
|
||||
f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23);
|
||||
f((1 << ((Tb>>1)+3))|shift, 22, 16);
|
||||
f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
public:
|
||||
void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
|
||||
assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
|
||||
_ushll(Vd, Ta, Vn, Tb, shift);
|
||||
_xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
|
||||
}
|
||||
|
||||
void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
|
||||
assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
|
||||
_ushll(Vd, Ta, Vn, Tb, shift);
|
||||
_xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
|
||||
}
|
||||
|
||||
void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
|
||||
ushll(Vd, Ta, Vn, Tb, 0);
|
||||
}
|
||||
|
||||
void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
|
||||
assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
|
||||
_xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
|
||||
}
|
||||
|
||||
void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
|
||||
assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
|
||||
_xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
|
||||
}
|
||||
|
||||
void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
|
||||
sshll(Vd, Ta, Vn, Tb, 0);
|
||||
}
|
||||
|
||||
// Move from general purpose register
|
||||
@ -2649,6 +2764,15 @@ public:
|
||||
f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
void xtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
|
||||
starti;
|
||||
int size_b = (int)Tb >> 1;
|
||||
int size_a = (int)Ta >> 1;
|
||||
assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
|
||||
f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size_b, 23, 22);
|
||||
f(0b100001001010, 21, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs)
|
||||
{
|
||||
starti;
|
||||
|
||||
@ -611,6 +611,16 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); }
|
||||
|
||||
// Generate indices for iota vector.
|
||||
address generate_iota_indices(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x0706050403020100, relocInfo::none);
|
||||
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
// The inner part of zero_words(). This is the bulk operation,
|
||||
// zeroing words in blocks, possibly using DC ZVA to do it. The
|
||||
// caller is responsible for zeroing the last few words.
|
||||
@ -5958,6 +5968,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
SharedRuntime::
|
||||
throw_NullPointerException_at_call));
|
||||
|
||||
StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices");
|
||||
|
||||
// arraycopy stubs used by compilers
|
||||
generate_arraycopy_stubs();
|
||||
|
||||
|
||||
@ -40,6 +40,7 @@ address StubRoutines::aarch64::_f2i_fixup = NULL;
|
||||
address StubRoutines::aarch64::_f2l_fixup = NULL;
|
||||
address StubRoutines::aarch64::_d2i_fixup = NULL;
|
||||
address StubRoutines::aarch64::_d2l_fixup = NULL;
|
||||
address StubRoutines::aarch64::_vector_iota_indices = NULL;
|
||||
address StubRoutines::aarch64::_float_sign_mask = NULL;
|
||||
address StubRoutines::aarch64::_float_sign_flip = NULL;
|
||||
address StubRoutines::aarch64::_double_sign_mask = NULL;
|
||||
|
||||
@ -51,6 +51,7 @@ class aarch64 {
|
||||
static address _d2i_fixup;
|
||||
static address _d2l_fixup;
|
||||
|
||||
static address _vector_iota_indices;
|
||||
static address _float_sign_mask;
|
||||
static address _float_sign_flip;
|
||||
static address _double_sign_mask;
|
||||
@ -106,6 +107,10 @@ class aarch64 {
|
||||
return _d2l_fixup;
|
||||
}
|
||||
|
||||
static address vector_iota_indices() {
|
||||
return _vector_iota_indices;
|
||||
}
|
||||
|
||||
static address float_sign_mask()
|
||||
{
|
||||
return _float_sign_mask;
|
||||
|
||||
@ -993,6 +993,10 @@ const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::supports_vector_variable_shifts(void) {
|
||||
return VM_Version::has_simd();
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
|
||||
@ -2161,6 +2161,10 @@ const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::supports_vector_variable_shifts(void) {
|
||||
return false; // not supported
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
|
||||
@ -1573,6 +1573,10 @@ const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Matcher::supports_vector_variable_shifts(void) {
|
||||
return false; // not supported
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
return default_pressure_threshold;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -588,6 +588,7 @@ class Assembler : public AbstractAssembler {
|
||||
#endif
|
||||
};
|
||||
|
||||
// Comparison predicates for integral types & FP types when using SSE
|
||||
enum ComparisonPredicate {
|
||||
eq = 0,
|
||||
lt = 1,
|
||||
@ -599,6 +600,51 @@ class Assembler : public AbstractAssembler {
|
||||
_true = 7
|
||||
};
|
||||
|
||||
// Comparison predicates for FP types when using AVX
|
||||
// O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
|
||||
// S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
|
||||
enum ComparisonPredicateFP {
|
||||
EQ_OQ = 0,
|
||||
LT_OS = 1,
|
||||
LE_OS = 2,
|
||||
UNORD_Q = 3,
|
||||
NEQ_UQ = 4,
|
||||
NLT_US = 5,
|
||||
NLE_US = 6,
|
||||
ORD_Q = 7,
|
||||
EQ_UQ = 8,
|
||||
NGE_US = 9,
|
||||
NGT_US = 0xA,
|
||||
FALSE_OQ = 0XB,
|
||||
NEQ_OQ = 0xC,
|
||||
GE_OS = 0xD,
|
||||
GT_OS = 0xE,
|
||||
TRUE_UQ = 0xF,
|
||||
EQ_OS = 0x10,
|
||||
LT_OQ = 0x11,
|
||||
LE_OQ = 0x12,
|
||||
UNORD_S = 0x13,
|
||||
NEQ_US = 0x14,
|
||||
NLT_UQ = 0x15,
|
||||
NLE_UQ = 0x16,
|
||||
ORD_S = 0x17,
|
||||
EQ_US = 0x18,
|
||||
NGE_UQ = 0x19,
|
||||
NGT_UQ = 0x1A,
|
||||
FALSE_OS = 0x1B,
|
||||
NEQ_OS = 0x1C,
|
||||
GE_OQ = 0x1D,
|
||||
GT_OQ = 0x1E,
|
||||
TRUE_US =0x1F
|
||||
};
|
||||
|
||||
enum Width {
|
||||
B = 0,
|
||||
W = 1,
|
||||
D = 2,
|
||||
Q = 3
|
||||
};
|
||||
|
||||
//---< calculate length of instruction >---
|
||||
// As instruction size can't be found out easily on x86/x64,
|
||||
// we just use '4' for len and maxlen.
|
||||
@ -918,6 +964,7 @@ private:
|
||||
void adcq(Register dst, Register src);
|
||||
|
||||
void addb(Address dst, int imm8);
|
||||
void addw(Register dst, Register src);
|
||||
void addw(Address dst, int imm16);
|
||||
|
||||
void addl(Address dst, int32_t imm32);
|
||||
@ -968,6 +1015,8 @@ private:
|
||||
void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
void andw(Register dst, Register src);
|
||||
|
||||
void andl(Address dst, int32_t imm32);
|
||||
void andl(Register dst, int32_t imm32);
|
||||
void andl(Register dst, Address src);
|
||||
@ -1093,9 +1142,11 @@ private:
|
||||
|
||||
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
|
||||
void cvtdq2pd(XMMRegister dst, XMMRegister src);
|
||||
void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
|
||||
void cvtdq2ps(XMMRegister dst, XMMRegister src);
|
||||
void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
|
||||
void cvtss2sd(XMMRegister dst, XMMRegister src);
|
||||
@ -1111,8 +1162,25 @@ private:
|
||||
void cvttss2sil(Register dst, XMMRegister src);
|
||||
void cvttss2siq(Register dst, XMMRegister src);
|
||||
|
||||
// Convert vector double to int
|
||||
void cvttpd2dq(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// Convert vector float and double
|
||||
void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Convert vector long to vector FP
|
||||
void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Evex casts with truncation
|
||||
void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
//Abs of packed Integer values
|
||||
void pabsb(XMMRegister dst, XMMRegister src);
|
||||
void pabsw(XMMRegister dst, XMMRegister src);
|
||||
@ -1472,20 +1540,26 @@ private:
|
||||
void vmovdqu(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// Move Unaligned 512bit Vector
|
||||
void evmovdqub(Address dst, XMMRegister src, int vector_len);
|
||||
void evmovdqub(XMMRegister dst, Address src, int vector_len);
|
||||
void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len);
|
||||
void evmovdquw(Address dst, XMMRegister src, int vector_len);
|
||||
void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len);
|
||||
void evmovdquw(XMMRegister dst, Address src, int vector_len);
|
||||
void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len);
|
||||
void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
|
||||
void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
|
||||
void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
|
||||
void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
|
||||
void evmovdqul(Address dst, XMMRegister src, int vector_len);
|
||||
void evmovdqul(XMMRegister dst, Address src, int vector_len);
|
||||
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
|
||||
void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdquq(Address dst, XMMRegister src, int vector_len);
|
||||
void evmovdquq(XMMRegister dst, Address src, int vector_len);
|
||||
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
|
||||
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
// Generic move instructions.
|
||||
void evmovdqu(Address dst, KRegister mask, XMMRegister src, int vector_len, int type);
|
||||
@ -1521,6 +1595,9 @@ private:
|
||||
// Move Quadword
|
||||
void movq(Address dst, XMMRegister src);
|
||||
void movq(XMMRegister dst, Address src);
|
||||
void movq(XMMRegister dst, XMMRegister src);
|
||||
void movq(Register dst, XMMRegister src);
|
||||
void movq(XMMRegister dst, Register src);
|
||||
|
||||
void movsbl(Register dst, Address src);
|
||||
void movsbl(Register dst, Register src);
|
||||
@ -1601,6 +1678,8 @@ private:
|
||||
void btrq(Address dst, int imm8);
|
||||
#endif
|
||||
|
||||
void orw(Register dst, Register src);
|
||||
|
||||
void orl(Address dst, int32_t imm32);
|
||||
void orl(Register dst, int32_t imm32);
|
||||
void orl(Register dst, Address src);
|
||||
@ -1614,17 +1693,32 @@ private:
|
||||
void orq(Register dst, Address src);
|
||||
void orq(Register dst, Register src);
|
||||
|
||||
// Pack with signed saturation
|
||||
void packsswb(XMMRegister dst, XMMRegister src);
|
||||
void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void packssdw(XMMRegister dst, XMMRegister src);
|
||||
void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
// Pack with unsigned saturation
|
||||
void packuswb(XMMRegister dst, XMMRegister src);
|
||||
void packuswb(XMMRegister dst, Address src);
|
||||
void packusdw(XMMRegister dst, XMMRegister src);
|
||||
void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
// Pemutation of 64bit words
|
||||
// Permutations
|
||||
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
|
||||
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
|
||||
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
|
||||
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
|
||||
void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
|
||||
void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
|
||||
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
|
||||
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
void pause();
|
||||
@ -1637,11 +1731,14 @@ private:
|
||||
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
|
||||
|
||||
void pcmpeqb(XMMRegister dst, XMMRegister src);
|
||||
void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
|
||||
|
||||
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
|
||||
void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
|
||||
void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
@ -1654,16 +1751,22 @@ private:
|
||||
void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
void pcmpeqd(XMMRegister dst, XMMRegister src);
|
||||
void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len);
|
||||
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
void pcmpeqq(XMMRegister dst, XMMRegister src);
|
||||
void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
|
||||
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
void pcmpgtq(XMMRegister dst, XMMRegister src);
|
||||
void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
void pmovmskb(Register dst, XMMRegister src);
|
||||
void vpmovmskb(Register dst, XMMRegister src);
|
||||
|
||||
@ -1672,6 +1775,7 @@ private:
|
||||
void pextrq(Register dst, XMMRegister src, int imm8);
|
||||
void pextrd(Address dst, XMMRegister src, int imm8);
|
||||
void pextrq(Address dst, XMMRegister src, int imm8);
|
||||
void pextrb(Register dst, XMMRegister src, int imm8);
|
||||
void pextrb(Address dst, XMMRegister src, int imm8);
|
||||
// SSE 2 extract
|
||||
void pextrw(Register dst, XMMRegister src, int imm8);
|
||||
@ -1680,21 +1784,46 @@ private:
|
||||
// SSE 4.1 insert
|
||||
void pinsrd(XMMRegister dst, Register src, int imm8);
|
||||
void pinsrq(XMMRegister dst, Register src, int imm8);
|
||||
void pinsrb(XMMRegister dst, Register src, int imm8);
|
||||
void pinsrd(XMMRegister dst, Address src, int imm8);
|
||||
void pinsrq(XMMRegister dst, Address src, int imm8);
|
||||
void pinsrb(XMMRegister dst, Address src, int imm8);
|
||||
void insertps(XMMRegister dst, XMMRegister src, int imm8);
|
||||
// SSE 2 insert
|
||||
void pinsrw(XMMRegister dst, Register src, int imm8);
|
||||
void pinsrw(XMMRegister dst, Address src, int imm8);
|
||||
|
||||
// SSE4.1 packed move
|
||||
// AVX insert
|
||||
void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
|
||||
void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
|
||||
void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
|
||||
void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
|
||||
void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
|
||||
|
||||
// Zero extend moves
|
||||
void pmovzxbw(XMMRegister dst, XMMRegister src);
|
||||
void pmovzxbw(XMMRegister dst, Address src);
|
||||
|
||||
void pmovzxbd(XMMRegister dst, XMMRegister src);
|
||||
void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
|
||||
void pmovzxdq(XMMRegister dst, XMMRegister src);
|
||||
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
|
||||
|
||||
// Sign extend moves
|
||||
void pmovsxbd(XMMRegister dst, XMMRegister src);
|
||||
void pmovsxbq(XMMRegister dst, XMMRegister src);
|
||||
void pmovsxbw(XMMRegister dst, XMMRegister src);
|
||||
void pmovsxwd(XMMRegister dst, XMMRegister src);
|
||||
void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
void evpmovwb(Address dst, XMMRegister src, int vector_len);
|
||||
void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
|
||||
|
||||
@ -1702,10 +1831,6 @@ private:
|
||||
|
||||
void evpmovdb(Address dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Sign extend moves
|
||||
void pmovsxbw(XMMRegister dst, XMMRegister src);
|
||||
void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Multiply add
|
||||
void pmaddwd(XMMRegister dst, XMMRegister src);
|
||||
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
@ -1749,10 +1874,17 @@ private:
|
||||
void pshufd(XMMRegister dst, Address src, int mode);
|
||||
void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
|
||||
|
||||
// Shuffle Packed Low Words
|
||||
// Shuffle Packed High/Low Words
|
||||
void pshufhw(XMMRegister dst, XMMRegister src, int mode);
|
||||
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
|
||||
void pshuflw(XMMRegister dst, Address src, int mode);
|
||||
|
||||
//shuffle floats and doubles
|
||||
void pshufps(XMMRegister, XMMRegister, int);
|
||||
void pshufpd(XMMRegister, XMMRegister, int);
|
||||
void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
|
||||
void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
|
||||
|
||||
// Shuffle packed values at 128 bit granularity
|
||||
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
|
||||
|
||||
@ -1768,6 +1900,9 @@ private:
|
||||
void vptest(XMMRegister dst, XMMRegister src);
|
||||
void vptest(XMMRegister dst, Address src);
|
||||
|
||||
// Vector compare
|
||||
void vptest(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Interleave Low Bytes
|
||||
void punpcklbw(XMMRegister dst, XMMRegister src);
|
||||
void punpcklbw(XMMRegister dst, Address src);
|
||||
@ -1841,6 +1976,7 @@ private:
|
||||
void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||
|
||||
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
|
||||
void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
|
||||
|
||||
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
|
||||
void sha1nexte(XMMRegister dst, XMMRegister src);
|
||||
@ -1959,6 +2095,7 @@ private:
|
||||
void xorl(Register dst, Register src);
|
||||
|
||||
void xorb(Register dst, Address src);
|
||||
void xorw(Register dst, Register src);
|
||||
|
||||
void xorq(Register dst, Address src);
|
||||
void xorq(Register dst, Register src);
|
||||
@ -1997,6 +2134,8 @@ private:
|
||||
|
||||
|
||||
//====================VECTOR ARITHMETIC=====================================
|
||||
void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
|
||||
void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
|
||||
|
||||
// Add Packed Floating-Point Values
|
||||
void addpd(XMMRegister dst, XMMRegister src);
|
||||
@ -2106,13 +2245,41 @@ private:
|
||||
// Multiply packed integers (only shorts and ints)
|
||||
void pmullw(XMMRegister dst, XMMRegister src);
|
||||
void pmulld(XMMRegister dst, XMMRegister src);
|
||||
void pmuludq(XMMRegister dst, XMMRegister src);
|
||||
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
// Minimum of packed integers
|
||||
void pminsb(XMMRegister dst, XMMRegister src);
|
||||
void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void pminsw(XMMRegister dst, XMMRegister src);
|
||||
void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void pminsd(XMMRegister dst, XMMRegister src);
|
||||
void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void minps(XMMRegister dst, XMMRegister src);
|
||||
void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void minpd(XMMRegister dst, XMMRegister src);
|
||||
void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
|
||||
// Maximum of packed integers
|
||||
void pmaxsb(XMMRegister dst, XMMRegister src);
|
||||
void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void pmaxsw(XMMRegister dst, XMMRegister src);
|
||||
void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void pmaxsd(XMMRegister dst, XMMRegister src);
|
||||
void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void maxps(XMMRegister dst, XMMRegister src);
|
||||
void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void maxpd(XMMRegister dst, XMMRegister src);
|
||||
void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
|
||||
// Shift left packed integers
|
||||
void psllw(XMMRegister dst, int shift);
|
||||
void pslld(XMMRegister dst, int shift);
|
||||
@ -2154,9 +2321,22 @@ private:
|
||||
void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
|
||||
void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
|
||||
void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
|
||||
// Variable shift left packed integers
|
||||
void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
|
||||
// Variable shift right packed integers
|
||||
void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
|
||||
// Variable shift right arithmetic packed integers
|
||||
void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
|
||||
void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
|
||||
@ -2164,6 +2344,7 @@ private:
|
||||
void pand(XMMRegister dst, XMMRegister src);
|
||||
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
// Andn packed integers
|
||||
@ -2176,10 +2357,15 @@ private:
|
||||
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
|
||||
|
||||
// Xor packed integers
|
||||
void pxor(XMMRegister dst, XMMRegister src);
|
||||
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
@ -2257,7 +2443,21 @@ private:
|
||||
void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
|
||||
void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
|
||||
|
||||
void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len);
|
||||
// Gather AVX2 and AVX3
|
||||
void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
|
||||
void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
|
||||
void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
|
||||
void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
|
||||
void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
|
||||
void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
|
||||
void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
|
||||
void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
|
||||
|
||||
//Scatter AVX3 only
|
||||
void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
|
||||
void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
|
||||
void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
|
||||
void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
|
||||
|
||||
// Carry-Less Multiplication Quadword
|
||||
void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
|
||||
@ -2270,14 +2470,56 @@ private:
|
||||
// runtime code and native libraries.
|
||||
void vzeroupper();
|
||||
|
||||
// AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
|
||||
void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
|
||||
void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
|
||||
void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
|
||||
// Vector double compares
|
||||
void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
|
||||
void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
ComparisonPredicateFP comparison, int vector_len);
|
||||
|
||||
// Vector float compares
|
||||
void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
|
||||
void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
ComparisonPredicateFP comparison, int vector_len);
|
||||
|
||||
// Vector integer compares
|
||||
void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
int comparison, int vector_len);
|
||||
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
|
||||
int comparison, int vector_len);
|
||||
|
||||
// Vector long compares
|
||||
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
int comparison, int vector_len);
|
||||
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
|
||||
int comparison, int vector_len);
|
||||
|
||||
// Vector byte compares
|
||||
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
int comparison, int vector_len);
|
||||
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
|
||||
int comparison, int vector_len);
|
||||
|
||||
// Vector short compares
|
||||
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
int comparison, int vector_len);
|
||||
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
|
||||
int comparison, int vector_len);
|
||||
|
||||
// Vector blends
|
||||
void blendvps(XMMRegister dst, XMMRegister src);
|
||||
void blendvpd(XMMRegister dst, XMMRegister src);
|
||||
void pblendvb(XMMRegister dst, XMMRegister src);
|
||||
void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
|
||||
void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
|
||||
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
|
||||
void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
protected:
|
||||
// Next instructions require address alignment 16 bytes SSE mode.
|
||||
// They should be called only from corresponding MacroAssembler instructions.
|
||||
@ -2373,7 +2615,8 @@ public:
|
||||
// Internal encoding data used in compressed immediate offset programming
|
||||
void set_evex_encoding(int value) { _evex_encoding = value; }
|
||||
|
||||
// Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
|
||||
// When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
|
||||
// This method unsets it so that merge semantics are used instead.
|
||||
void reset_is_clear_context(void) { _is_clear_context = false; }
|
||||
|
||||
// Map back to current asembler so that we can manage object level assocation
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -28,6 +28,8 @@
|
||||
// C2_MacroAssembler contains high-level macros for C2
|
||||
|
||||
public:
|
||||
Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
|
||||
|
||||
// special instructions for EVEX
|
||||
void setvectmask(Register dst, Register src);
|
||||
void restorevectmask();
|
||||
@ -71,25 +73,69 @@ public:
|
||||
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
|
||||
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
|
||||
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
|
||||
|
||||
void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp = xnoreg);
|
||||
void vpminmax(int opcode, BasicType elem_bt,
|
||||
XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
||||
int vlen_enc);
|
||||
|
||||
void vminmax_fp(int opcode, BasicType elem_bt,
|
||||
XMMRegister dst, XMMRegister a, XMMRegister b,
|
||||
XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
|
||||
int vlen_enc);
|
||||
void evminmax_fp(int opcode, BasicType elem_bt,
|
||||
XMMRegister dst, XMMRegister a, XMMRegister b,
|
||||
KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
|
||||
int vlen_enc);
|
||||
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
|
||||
void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
|
||||
void vshiftd_imm(int opcode, XMMRegister dst, int shift);
|
||||
void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
|
||||
void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
|
||||
void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
|
||||
void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
|
||||
void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
|
||||
void vshiftq_imm(int opcode, XMMRegister dst, int shift);
|
||||
void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
|
||||
void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
|
||||
|
||||
void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
|
||||
void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
|
||||
// Reductions for vectors of ints, longs, floats, and doubles.
|
||||
void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
|
||||
void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
|
||||
void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
|
||||
void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
|
||||
void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
|
||||
|
||||
// dst = src1 + reduce(op, src2) using vtmp as temps
|
||||
void insert(BasicType typ, XMMRegister dst, Register val, int idx);
|
||||
void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
|
||||
void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
|
||||
void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
|
||||
void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
|
||||
|
||||
// extract
|
||||
void extract(BasicType typ, Register dst, XMMRegister src, int idx);
|
||||
XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
|
||||
void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
|
||||
void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg);
|
||||
|
||||
// blend
|
||||
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
|
||||
void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
|
||||
|
||||
void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt);
|
||||
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
|
||||
|
||||
// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
|
||||
|
||||
// dst = src1 reduce(op, src2) using vtmp as temps
|
||||
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#ifdef _LP64
|
||||
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
@ -99,32 +145,62 @@ public:
|
||||
void reduce_fp(int opcode, int vlen,
|
||||
XMMRegister dst, XMMRegister src,
|
||||
XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
|
||||
void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
|
||||
XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
|
||||
void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
|
||||
XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
|
||||
private:
|
||||
void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
// Int Reduction
|
||||
void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
// Byte Reduction
|
||||
void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
// Short Reduction
|
||||
void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
// Long Reduction
|
||||
#ifdef _LP64
|
||||
void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#endif // _LP64
|
||||
|
||||
// Float Reduction
|
||||
void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
|
||||
void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
|
||||
void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
// Double Reduction
|
||||
void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
|
||||
void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
|
||||
void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
// Base reduction instruction
|
||||
void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
|
||||
void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
||||
|
||||
public:
|
||||
|
||||
|
||||
@ -112,6 +112,7 @@ void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
|
||||
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
|
||||
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
|
||||
}
|
||||
@ -2495,6 +2496,7 @@ void MacroAssembler::movdqu(XMMRegister dst, Address src) {
|
||||
|
||||
void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
|
||||
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
|
||||
if (dst->encoding() == src->encoding()) return;
|
||||
Assembler::movdqu(dst, src);
|
||||
}
|
||||
|
||||
@ -2519,6 +2521,7 @@ void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
|
||||
|
||||
void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
|
||||
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
|
||||
if (dst->encoding() == src->encoding()) return;
|
||||
Assembler::vmovdqu(dst, src);
|
||||
}
|
||||
|
||||
@ -2532,6 +2535,64 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
kmovwl(dst, as_Address(src));
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
kmovwl(dst, Address(scratch_reg, 0));
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
|
||||
int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
if (mask == k0) {
|
||||
Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
|
||||
} else {
|
||||
Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
|
||||
}
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
if (mask == k0) {
|
||||
Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
|
||||
} else {
|
||||
Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
|
||||
int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
|
||||
int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
|
||||
int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evmovdquq(dst, as_Address(src), vector_len);
|
||||
@ -3019,6 +3080,98 @@ void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||
Assembler::vpcmpeqw(dst, nds, src, vector_len);
|
||||
}
|
||||
|
||||
void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
|
||||
AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
|
||||
if (width == Assembler::Q) {
|
||||
Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
|
||||
} else {
|
||||
Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
|
||||
int eq_cond_enc = 0x29;
|
||||
int gt_cond_enc = 0x37;
|
||||
if (width != Assembler::Q) {
|
||||
eq_cond_enc = 0x74 + width;
|
||||
gt_cond_enc = 0x64 + width;
|
||||
}
|
||||
switch (cond) {
|
||||
case eq:
|
||||
vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
|
||||
break;
|
||||
case neq:
|
||||
vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
|
||||
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
|
||||
break;
|
||||
case le:
|
||||
vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
|
||||
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
|
||||
break;
|
||||
case nlt:
|
||||
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
|
||||
vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
|
||||
break;
|
||||
case lt:
|
||||
vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
|
||||
break;
|
||||
case nle:
|
||||
vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
|
||||
break;
|
||||
default:
|
||||
assert(false, "Should not reach here");
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
|
||||
Assembler::vpmovzxbw(dst, src, vector_len);
|
||||
@ -3143,6 +3296,16 @@ void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
bool merge, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
|
||||
if (reachable(src)) {
|
||||
vdivsd(dst, nds, as_Address(src));
|
||||
@ -3239,7 +3402,14 @@ void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src,
|
||||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------------
|
||||
void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::vpermd(dst, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
|
||||
const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
|
||||
@ -5765,7 +5935,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
|
||||
|
||||
bind(VECTOR64_LOOP);
|
||||
// AVX512 code to compare 64 byte vectors.
|
||||
evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
|
||||
evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
|
||||
kortestql(k7, k7);
|
||||
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
|
||||
@ -5784,7 +5954,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
|
||||
notq(tmp2);
|
||||
kmovql(k3, tmp2);
|
||||
|
||||
evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
|
||||
evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
|
||||
|
||||
ktestql(k7, k3);
|
||||
@ -7579,7 +7749,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
notl(result);
|
||||
kmovdl(k3, result);
|
||||
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(k2, k3);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
@ -7604,7 +7774,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
negptr(len);
|
||||
|
||||
bind(copy_32_loop);
|
||||
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
|
||||
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
kortestdl(k2, k2);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
@ -7629,7 +7799,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
|
||||
kmovdl(k3, result);
|
||||
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(k2, k3);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
@ -7774,7 +7944,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
|
||||
// inflate 32 chars per iter
|
||||
bind(copy_32_loop);
|
||||
vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
|
||||
addptr(len, 32);
|
||||
jcc(Assembler::notZero, copy_32_loop);
|
||||
|
||||
@ -7789,7 +7959,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
|
||||
notl(tmp3_aliased);
|
||||
kmovdl(k2, tmp3_aliased);
|
||||
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
|
||||
|
||||
jmp(done);
|
||||
bind(avx3_threshold);
|
||||
|
||||
@ -1076,15 +1076,59 @@ public:
|
||||
void movdqu(XMMRegister dst, Address src);
|
||||
void movdqu(XMMRegister dst, XMMRegister src);
|
||||
void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1);
|
||||
|
||||
void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); }
|
||||
void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
|
||||
void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
|
||||
void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
|
||||
|
||||
// AVX Unaligned forms
|
||||
void vmovdqu(Address dst, XMMRegister src);
|
||||
void vmovdqu(XMMRegister dst, Address src);
|
||||
void vmovdqu(XMMRegister dst, XMMRegister src);
|
||||
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
|
||||
|
||||
// AVX512 Unaligned
|
||||
void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
|
||||
void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
|
||||
void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
|
||||
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
|
||||
void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
|
||||
|
||||
void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
|
||||
void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
|
||||
void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
|
||||
void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
|
||||
void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
|
||||
|
||||
void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
|
||||
void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
|
||||
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
if (dst->encoding() == src->encoding()) return;
|
||||
Assembler::evmovdqul(dst, src, vector_len);
|
||||
}
|
||||
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
|
||||
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
|
||||
void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
if (dst->encoding() == src->encoding() && mask == k0) return;
|
||||
Assembler::evmovdqul(dst, mask, src, merge, vector_len);
|
||||
}
|
||||
void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
|
||||
|
||||
void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
|
||||
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
|
||||
void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
|
||||
void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch);
|
||||
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
if (dst->encoding() == src->encoding()) return;
|
||||
Assembler::evmovdquq(dst, src, vector_len);
|
||||
}
|
||||
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
|
||||
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
|
||||
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
if (dst->encoding() == src->encoding() && mask == k0) return;
|
||||
Assembler::evmovdquq(dst, mask, src, merge, vector_len);
|
||||
}
|
||||
void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
|
||||
|
||||
// Move Aligned Double Quadword
|
||||
void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
|
||||
@ -1206,6 +1250,30 @@ public:
|
||||
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
|
||||
|
||||
// Vector compares
|
||||
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
int comparison, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, vector_len); }
|
||||
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg);
|
||||
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
int comparison, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, vector_len); }
|
||||
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg);
|
||||
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
int comparison, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, vector_len); }
|
||||
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg);
|
||||
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
int comparison, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, vector_len); }
|
||||
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
|
||||
int comparison, int vector_len, Register scratch_reg);
|
||||
|
||||
|
||||
// Emit comparison instruction for the specified comparison predicate.
|
||||
void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg);
|
||||
void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len);
|
||||
|
||||
void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
|
||||
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); }
|
||||
@ -1234,6 +1302,7 @@ public:
|
||||
void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
|
||||
|
||||
void vptest(XMMRegister dst, XMMRegister src);
|
||||
void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); }
|
||||
|
||||
void punpcklbw(XMMRegister dst, XMMRegister src);
|
||||
void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
|
||||
@ -1252,6 +1321,8 @@ public:
|
||||
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
|
||||
void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
|
||||
|
||||
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
|
||||
|
||||
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
|
||||
void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
|
||||
void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
|
||||
@ -1307,6 +1378,9 @@ public:
|
||||
void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
|
||||
void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
|
||||
|
||||
void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); }
|
||||
void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
|
||||
|
||||
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||
if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
|
||||
Assembler::vinserti32x4(dst, dst, src, imm8);
|
||||
|
||||
@ -587,6 +587,29 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_iota_indices(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data(0x03020100, relocInfo::none, 0);
|
||||
__ emit_data(0x07060504, relocInfo::none, 0);
|
||||
__ emit_data(0x0B0A0908, relocInfo::none, 0);
|
||||
__ emit_data(0x0F0E0D0C, relocInfo::none, 0);
|
||||
__ emit_data(0x13121110, relocInfo::none, 0);
|
||||
__ emit_data(0x17161514, relocInfo::none, 0);
|
||||
__ emit_data(0x1B1A1918, relocInfo::none, 0);
|
||||
__ emit_data(0x1F1E1D1C, relocInfo::none, 0);
|
||||
__ emit_data(0x23222120, relocInfo::none, 0);
|
||||
__ emit_data(0x27262524, relocInfo::none, 0);
|
||||
__ emit_data(0x2B2A2928, relocInfo::none, 0);
|
||||
__ emit_data(0x2F2E2D2C, relocInfo::none, 0);
|
||||
__ emit_data(0x33323130, relocInfo::none, 0);
|
||||
__ emit_data(0x37363534, relocInfo::none, 0);
|
||||
__ emit_data(0x3B3A3938, relocInfo::none, 0);
|
||||
__ emit_data(0x3F3E3D3C, relocInfo::none, 0);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_mask_long_double(const char *stub_name, int32_t maskhi, int32_t masklo) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
@ -627,6 +650,40 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len,
|
||||
int32_t val0, int32_t val1, int32_t val2, int32_t val3,
|
||||
int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
|
||||
int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
|
||||
int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
|
||||
assert(len != Assembler::AVX_NoVec, "vector len must be specified");
|
||||
__ emit_data(val0, relocInfo::none, 0);
|
||||
__ emit_data(val1, relocInfo::none, 0);
|
||||
__ emit_data(val2, relocInfo::none, 0);
|
||||
__ emit_data(val3, relocInfo::none, 0);
|
||||
if (len >= Assembler::AVX_256bit) {
|
||||
__ emit_data(val4, relocInfo::none, 0);
|
||||
__ emit_data(val5, relocInfo::none, 0);
|
||||
__ emit_data(val6, relocInfo::none, 0);
|
||||
__ emit_data(val7, relocInfo::none, 0);
|
||||
if (len >= Assembler::AVX_512bit) {
|
||||
__ emit_data(val8, relocInfo::none, 0);
|
||||
__ emit_data(val9, relocInfo::none, 0);
|
||||
__ emit_data(val10, relocInfo::none, 0);
|
||||
__ emit_data(val11, relocInfo::none, 0);
|
||||
__ emit_data(val12, relocInfo::none, 0);
|
||||
__ emit_data(val13, relocInfo::none, 0);
|
||||
__ emit_data(val14, relocInfo::none, 0);
|
||||
__ emit_data(val15, relocInfo::none, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
// Non-destructive plausibility checks for oops
|
||||
|
||||
@ -3902,8 +3959,19 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask_long_double("vector_double_sign_mask", 0x7FFFFFFF, 0xFFFFFFFF);
|
||||
StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask_long_double("vector_double_sign_flip", 0x80000000, 0x00000000);
|
||||
StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff);
|
||||
StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff);
|
||||
StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask("vector_int_to_short_mask", 0x0000ffff);
|
||||
StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32("vector_32_bit_mask", Assembler::AVX_512bit,
|
||||
0xFFFFFFFF, 0, 0, 0);
|
||||
StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
|
||||
StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask("vector_int_shuffle_mask", 0x03020100);
|
||||
StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask("vector_short_shuffle_mask", 0x01000100);
|
||||
StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask_long_double("vector_long_shuffle_mask", 0x00000001, 0x0);
|
||||
StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
|
||||
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double("vector_long_sign_mask", 0x80000000, 0x00000000);
|
||||
StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFF);
|
||||
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
|
||||
|
||||
// support for verify_oop (must happen after universe_init)
|
||||
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
|
||||
|
||||
@ -809,6 +809,21 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_iota_indices(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x0706050403020100, relocInfo::none);
|
||||
__ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
|
||||
__ emit_data64(0x1716151413121110, relocInfo::none);
|
||||
__ emit_data64(0x1F1E1D1C1B1A1918, relocInfo::none);
|
||||
__ emit_data64(0x2726252423222120, relocInfo::none);
|
||||
__ emit_data64(0x2F2E2D2C2B2A2928, relocInfo::none);
|
||||
__ emit_data64(0x3736353433323130, relocInfo::none);
|
||||
__ emit_data64(0x3F3E3D3C3B3A3938, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_fp_mask(const char *stub_name, int64_t mask) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
@ -854,6 +869,57 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_fp_mask(const char *stub_name, int64_t mask) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len,
|
||||
int32_t val0, int32_t val1, int32_t val2, int32_t val3,
|
||||
int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
|
||||
int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
|
||||
int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
|
||||
assert(len != Assembler::AVX_NoVec, "vector len must be specified");
|
||||
__ emit_data(val0, relocInfo::none, 0);
|
||||
__ emit_data(val1, relocInfo::none, 0);
|
||||
__ emit_data(val2, relocInfo::none, 0);
|
||||
__ emit_data(val3, relocInfo::none, 0);
|
||||
if (len >= Assembler::AVX_256bit) {
|
||||
__ emit_data(val4, relocInfo::none, 0);
|
||||
__ emit_data(val5, relocInfo::none, 0);
|
||||
__ emit_data(val6, relocInfo::none, 0);
|
||||
__ emit_data(val7, relocInfo::none, 0);
|
||||
if (len >= Assembler::AVX_512bit) {
|
||||
__ emit_data(val8, relocInfo::none, 0);
|
||||
__ emit_data(val9, relocInfo::none, 0);
|
||||
__ emit_data(val10, relocInfo::none, 0);
|
||||
__ emit_data(val11, relocInfo::none, 0);
|
||||
__ emit_data(val12, relocInfo::none, 0);
|
||||
__ emit_data(val13, relocInfo::none, 0);
|
||||
__ emit_data(val14, relocInfo::none, 0);
|
||||
__ emit_data(val15, relocInfo::none, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// Non-destructive plausibility checks for oops
|
||||
//
|
||||
// Arguments:
|
||||
@ -6769,9 +6835,20 @@ address generate_avx_ghash_processBlocks() {
|
||||
StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x8000000080000000);
|
||||
StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF);
|
||||
StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000);
|
||||
StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFFFFFFFFFF);
|
||||
StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff);
|
||||
StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
|
||||
StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff000000ff);
|
||||
StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask("vector_int_to_short_mask", 0x0000ffff0000ffff);
|
||||
StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32("vector_32_bit_mask", Assembler::AVX_512bit,
|
||||
0xFFFFFFFF, 0, 0, 0);
|
||||
StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit,
|
||||
0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
|
||||
StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask("vector_int_shuffle_mask", 0x0302010003020100);
|
||||
StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask("vector_short_shuffle_mask", 0x0100010001000100);
|
||||
StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask("vector_long_shuffle_mask", 0x0000000100000000);
|
||||
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
|
||||
StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
|
||||
|
||||
// support for verify_oop (must happen after universe_init)
|
||||
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
|
||||
|
||||
@ -44,12 +44,21 @@ address StubRoutines::x86::_upper_word_mask_addr = NULL;
|
||||
address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
|
||||
address StubRoutines::x86::_k256_adr = NULL;
|
||||
address StubRoutines::x86::_vector_short_to_byte_mask = NULL;
|
||||
address StubRoutines::x86::_vector_int_to_byte_mask = NULL;
|
||||
address StubRoutines::x86::_vector_int_to_short_mask = NULL;
|
||||
address StubRoutines::x86::_vector_all_bits_set = NULL;
|
||||
address StubRoutines::x86::_vector_short_shuffle_mask = NULL;
|
||||
address StubRoutines::x86::_vector_int_shuffle_mask = NULL;
|
||||
address StubRoutines::x86::_vector_long_shuffle_mask = NULL;
|
||||
address StubRoutines::x86::_vector_float_sign_mask = NULL;
|
||||
address StubRoutines::x86::_vector_float_sign_flip = NULL;
|
||||
address StubRoutines::x86::_vector_double_sign_mask = NULL;
|
||||
address StubRoutines::x86::_vector_double_sign_flip = NULL;
|
||||
address StubRoutines::x86::_vector_byte_perm_mask = NULL;
|
||||
address StubRoutines::x86::_vector_long_sign_mask = NULL;
|
||||
address StubRoutines::x86::_vector_iota_indices = NULL;
|
||||
address StubRoutines::x86::_vector_32_bit_mask = NULL;
|
||||
address StubRoutines::x86::_vector_64_bit_mask = NULL;
|
||||
#ifdef _LP64
|
||||
address StubRoutines::x86::_k256_W_adr = NULL;
|
||||
address StubRoutines::x86::_k512_W_addr = NULL;
|
||||
|
||||
@ -146,8 +146,17 @@ class x86 {
|
||||
static address _vector_float_sign_flip;
|
||||
static address _vector_double_sign_mask;
|
||||
static address _vector_double_sign_flip;
|
||||
static address _vector_byte_perm_mask;
|
||||
static address _vector_long_sign_mask;
|
||||
static address _vector_all_bits_set;
|
||||
static address _vector_byte_perm_mask;
|
||||
static address _vector_int_to_byte_mask;
|
||||
static address _vector_int_to_short_mask;
|
||||
static address _vector_32_bit_mask;
|
||||
static address _vector_64_bit_mask;
|
||||
static address _vector_int_shuffle_mask;
|
||||
static address _vector_short_shuffle_mask;
|
||||
static address _vector_long_shuffle_mask;
|
||||
static address _vector_iota_indices;
|
||||
#ifdef _LP64
|
||||
static juint _k256_W[];
|
||||
static address _k256_W_adr;
|
||||
@ -248,13 +257,50 @@ class x86 {
|
||||
return _vector_double_sign_flip;
|
||||
}
|
||||
|
||||
static address vector_all_bits_set() {
|
||||
return _vector_all_bits_set;
|
||||
}
|
||||
|
||||
static address vector_byte_perm_mask() {
|
||||
return _vector_byte_perm_mask;
|
||||
}
|
||||
|
||||
static address vector_int_to_byte_mask() {
|
||||
return _vector_int_to_byte_mask;
|
||||
}
|
||||
|
||||
static address vector_int_to_short_mask() {
|
||||
return _vector_int_to_short_mask;
|
||||
}
|
||||
|
||||
static address vector_32_bit_mask() {
|
||||
return _vector_32_bit_mask;
|
||||
}
|
||||
|
||||
static address vector_64_bit_mask() {
|
||||
return _vector_64_bit_mask;
|
||||
}
|
||||
|
||||
static address vector_int_shuffle_mask() {
|
||||
return _vector_int_shuffle_mask;
|
||||
}
|
||||
|
||||
static address vector_short_shuffle_mask() {
|
||||
return _vector_short_shuffle_mask;
|
||||
}
|
||||
|
||||
static address vector_long_shuffle_mask() {
|
||||
return _vector_long_shuffle_mask;
|
||||
}
|
||||
|
||||
static address vector_long_sign_mask() {
|
||||
return _vector_long_sign_mask;
|
||||
}
|
||||
|
||||
static address vector_iota_indices() {
|
||||
return _vector_iota_indices;
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
static address k256_W_addr() { return _k256_W_adr; }
|
||||
static address k512_W_addr() { return _k512_W_addr; }
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -3315,7 +3315,7 @@ operand immI() %{
|
||||
%}
|
||||
|
||||
// Constant for test vs zero
|
||||
operand immI0() %{
|
||||
operand immI_0() %{
|
||||
predicate(n->get_int() == 0);
|
||||
match(ConI);
|
||||
|
||||
@ -3325,7 +3325,7 @@ operand immI0() %{
|
||||
%}
|
||||
|
||||
// Constant for increment
|
||||
operand immI1() %{
|
||||
operand immI_1() %{
|
||||
predicate(n->get_int() == 1);
|
||||
match(ConI);
|
||||
|
||||
@ -3419,15 +3419,6 @@ operand immI_32_63() %{
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immI_1() %{
|
||||
predicate( n->get_int() == 1 );
|
||||
match(ConI);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immI_2() %{
|
||||
predicate( n->get_int() == 2 );
|
||||
match(ConI);
|
||||
@ -3446,6 +3437,26 @@ operand immI_3() %{
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immI_4()
|
||||
%{
|
||||
predicate(n->get_int() == 4);
|
||||
match(ConI);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immI_8()
|
||||
%{
|
||||
predicate(n->get_int() == 8);
|
||||
match(ConI);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
// Pointer Immediate
|
||||
operand immP() %{
|
||||
match(ConP);
|
||||
@ -3815,6 +3826,18 @@ operand eRegP() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand rRegP() %{
|
||||
constraint(ALLOC_IN_RC(int_reg));
|
||||
match(RegP);
|
||||
match(eAXRegP);
|
||||
match(eBXRegP);
|
||||
match(eCXRegP);
|
||||
match(eDIRegP);
|
||||
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// On windows95, EBP is not safe to use for implicit null tests.
|
||||
operand eRegP_no_EBP() %{
|
||||
constraint(ALLOC_IN_RC(int_reg_no_ebp));
|
||||
@ -3947,6 +3970,15 @@ operand eADXRegL_low_only() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Flags register, used as output of compare instructions
|
||||
operand rFlagsReg() %{
|
||||
constraint(ALLOC_IN_RC(int_flags));
|
||||
match(RegFlags);
|
||||
|
||||
format %{ "EFLAGS" %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Flags register, used as output of compare instructions
|
||||
operand eFlagsReg() %{
|
||||
constraint(ALLOC_IN_RC(int_flags));
|
||||
@ -4077,6 +4109,14 @@ operand regF() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand legRegF() %{
|
||||
predicate( UseSSE>=1 );
|
||||
constraint(ALLOC_IN_RC(float_reg_legacy));
|
||||
match(RegF);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Float register operands
|
||||
operand vlRegF() %{
|
||||
constraint(ALLOC_IN_RC(float_reg_vl));
|
||||
@ -4096,6 +4136,14 @@ operand regD() %{
|
||||
%}
|
||||
|
||||
// Double register operands
|
||||
operand legRegD() %{
|
||||
predicate( UseSSE>=2 );
|
||||
constraint(ALLOC_IN_RC(double_reg_legacy));
|
||||
match(RegD);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vlRegD() %{
|
||||
constraint(ALLOC_IN_RC(double_reg_vl));
|
||||
match(RegD);
|
||||
@ -5846,6 +5894,46 @@ instruct loadKlass(eRegP dst, memory mem) %{
|
||||
ins_pipe( ialu_reg_mem );
|
||||
%}
|
||||
|
||||
// Load Float
|
||||
instruct MoveF2LEG(legRegF dst, regF src) %{
|
||||
match(Set dst src);
|
||||
format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
|
||||
ins_encode %{
|
||||
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Float
|
||||
instruct MoveLEG2F(regF dst, legRegF src) %{
|
||||
match(Set dst src);
|
||||
format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
|
||||
ins_encode %{
|
||||
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Double
|
||||
instruct MoveD2LEG(legRegD dst, regD src) %{
|
||||
match(Set dst src);
|
||||
format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
|
||||
ins_encode %{
|
||||
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Double
|
||||
instruct MoveLEG2D(regD dst, legRegD src) %{
|
||||
match(Set dst src);
|
||||
format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
|
||||
ins_encode %{
|
||||
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Double
|
||||
instruct loadDPR(regDPR dst, memory mem) %{
|
||||
predicate(UseSSE<=1);
|
||||
@ -5971,7 +6059,7 @@ instruct loadConI(rRegI dst, immI src) %{
|
||||
%}
|
||||
|
||||
// Load Constant zero
|
||||
instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
|
||||
instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
|
||||
match(Set dst src);
|
||||
effect(KILL cr);
|
||||
|
||||
@ -7083,7 +7171,7 @@ instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
|
||||
ins_pipe( ialu_reg );
|
||||
%}
|
||||
|
||||
instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
|
||||
instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
|
||||
predicate(UseIncDec);
|
||||
match(Set dst (AddI dst src));
|
||||
effect(KILL cr);
|
||||
@ -7183,7 +7271,7 @@ instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
|
||||
ins_pipe( ialu_mem_imm );
|
||||
%}
|
||||
|
||||
instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
|
||||
instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
|
||||
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
|
||||
effect(KILL cr);
|
||||
|
||||
@ -7552,7 +7640,7 @@ instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
|
||||
%}
|
||||
|
||||
// Subtract from a pointer
|
||||
instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
|
||||
instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
|
||||
match(Set dst (AddP dst (SubI zero src)));
|
||||
effect(KILL cr);
|
||||
|
||||
@ -7563,7 +7651,7 @@ instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
|
||||
ins_pipe( ialu_reg_reg );
|
||||
%}
|
||||
|
||||
instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
|
||||
instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
|
||||
match(Set dst (SubI zero dst));
|
||||
effect(KILL cr);
|
||||
|
||||
@ -8017,7 +8105,7 @@ instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlag
|
||||
|
||||
// Integer Shift Instructions
|
||||
// Shift Left by one
|
||||
instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
|
||||
instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
|
||||
match(Set dst (LShiftI dst shift));
|
||||
effect(KILL cr);
|
||||
|
||||
@ -8053,7 +8141,7 @@ instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
|
||||
%}
|
||||
|
||||
// Arithmetic shift right by one
|
||||
instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
|
||||
instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
|
||||
match(Set dst (RShiftI dst shift));
|
||||
effect(KILL cr);
|
||||
|
||||
@ -8065,7 +8153,7 @@ instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
|
||||
%}
|
||||
|
||||
// Arithmetic shift right by one
|
||||
instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
|
||||
instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
|
||||
match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
|
||||
effect(KILL cr);
|
||||
format %{ "SAR $dst,$shift" %}
|
||||
@ -8110,7 +8198,7 @@ instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
|
||||
%}
|
||||
|
||||
// Logical shift right by one
|
||||
instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
|
||||
instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
|
||||
match(Set dst (URShiftI dst shift));
|
||||
effect(KILL cr);
|
||||
|
||||
@ -8266,7 +8354,7 @@ instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1
|
||||
ins_pipe(ialu_reg_mem);
|
||||
%}
|
||||
|
||||
instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
|
||||
instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
|
||||
match(Set dst (AndI (SubI imm_zero src) src));
|
||||
predicate(UseBMI1Instructions);
|
||||
effect(KILL cr);
|
||||
@ -8279,7 +8367,7 @@ instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
|
||||
instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
|
||||
match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
|
||||
predicate(UseBMI1Instructions);
|
||||
effect(KILL cr);
|
||||
@ -8431,7 +8519,7 @@ instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
|
||||
|
||||
// ROL/ROR
|
||||
// ROL expand
|
||||
instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
|
||||
instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
|
||||
effect(USE_DEF dst, USE shift, KILL cr);
|
||||
|
||||
format %{ "ROL $dst, $shift" %}
|
||||
@ -8460,7 +8548,7 @@ instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
|
||||
// end of ROL expand
|
||||
|
||||
// ROL 32bit by one once
|
||||
instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
|
||||
instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
|
||||
match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
|
||||
|
||||
expand %{
|
||||
@ -8479,7 +8567,7 @@ instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
|
||||
%}
|
||||
|
||||
// ROL 32bit var by var once
|
||||
instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
|
||||
instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
|
||||
match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
|
||||
|
||||
expand %{
|
||||
@ -8497,7 +8585,7 @@ instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr
|
||||
%}
|
||||
|
||||
// ROR expand
|
||||
instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
|
||||
instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
|
||||
effect(USE_DEF dst, USE shift, KILL cr);
|
||||
|
||||
format %{ "ROR $dst, $shift" %}
|
||||
@ -8526,7 +8614,7 @@ instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
|
||||
// end of ROR expand
|
||||
|
||||
// ROR right once
|
||||
instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
|
||||
instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
|
||||
match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
|
||||
|
||||
expand %{
|
||||
@ -8545,7 +8633,7 @@ instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
|
||||
%}
|
||||
|
||||
// ROR 32bit var by var once
|
||||
instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
|
||||
instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
|
||||
match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
|
||||
|
||||
expand %{
|
||||
@ -8713,7 +8801,7 @@ instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
|
||||
instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
|
||||
match(Set dst (CmpLTMask dst zero));
|
||||
effect(DEF dst, KILL cr);
|
||||
ins_cost(100);
|
||||
@ -8827,7 +8915,7 @@ instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
|
||||
ins_pipe(ialu_reg_reg);
|
||||
%}
|
||||
|
||||
instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
|
||||
instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
|
||||
%{
|
||||
match(Set cr (OverflowSubI zero op2));
|
||||
effect(DEF cr, USE_KILL op2);
|
||||
@ -11979,7 +12067,7 @@ instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
|
||||
ins_pipe( ialu_cr_reg_mem );
|
||||
%}
|
||||
|
||||
instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
|
||||
instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
|
||||
match(Set cr (CmpI src zero));
|
||||
effect( DEF cr, USE src );
|
||||
|
||||
@ -11989,7 +12077,7 @@ instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
|
||||
ins_pipe( ialu_cr_reg_imm );
|
||||
%}
|
||||
|
||||
instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
|
||||
instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
|
||||
match(Set cr (CmpI (AndI src con) zero));
|
||||
|
||||
format %{ "TEST $src,$con" %}
|
||||
@ -11998,7 +12086,7 @@ instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
|
||||
ins_pipe( ialu_cr_reg_imm );
|
||||
%}
|
||||
|
||||
instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
|
||||
instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
|
||||
match(Set cr (CmpI (AndI src mem) zero));
|
||||
|
||||
format %{ "TEST $src,$mem" %}
|
||||
@ -12048,7 +12136,7 @@ instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
|
||||
// ins_encode( OpcP, RegMem( op1, op2) );
|
||||
//%}
|
||||
|
||||
instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
|
||||
instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
|
||||
match(Set cr (CmpU src zero));
|
||||
|
||||
format %{ "TESTu $src,$src" %}
|
||||
@ -12125,7 +12213,7 @@ instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
|
||||
// Cisc-spilled version of testP_reg
|
||||
// This will generate a signed flags result. This should be ok
|
||||
// since any compare to a zero should be eq/neq.
|
||||
instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
|
||||
instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
|
||||
match(Set cr (CmpP (LoadP op) zero));
|
||||
|
||||
format %{ "TEST $op,0xFFFFFFFF" %}
|
||||
@ -13496,7 +13584,7 @@ instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
|
||||
// match(Set dst (CopyI src));
|
||||
// %}
|
||||
//
|
||||
// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
|
||||
// instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
|
||||
// match(Set dst (AddI dst src));
|
||||
// effect(KILL cr);
|
||||
// %}
|
||||
|
||||
@ -2871,7 +2871,7 @@ operand immI()
|
||||
%}
|
||||
|
||||
// Constant for test vs zero
|
||||
operand immI0()
|
||||
operand immI_0()
|
||||
%{
|
||||
predicate(n->get_int() == 0);
|
||||
match(ConI);
|
||||
@ -2882,7 +2882,7 @@ operand immI0()
|
||||
%}
|
||||
|
||||
// Constant for increment
|
||||
operand immI1()
|
||||
operand immI_1()
|
||||
%{
|
||||
predicate(n->get_int() == 1);
|
||||
match(ConI);
|
||||
@ -2903,6 +2903,36 @@ operand immI_M1()
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immI_2()
|
||||
%{
|
||||
predicate(n->get_int() == 2);
|
||||
match(ConI);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immI_4()
|
||||
%{
|
||||
predicate(n->get_int() == 4);
|
||||
match(ConI);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immI_8()
|
||||
%{
|
||||
predicate(n->get_int() == 8);
|
||||
match(ConI);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
// Valid scale values for addressing modes
|
||||
operand immI2()
|
||||
%{
|
||||
@ -5217,19 +5247,19 @@ instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp,
|
||||
match(Set dst (MaxF a b));
|
||||
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
|
||||
format %{
|
||||
"blendvps $btmp,$b,$a,$b \n\t"
|
||||
"blendvps $atmp,$a,$b,$b \n\t"
|
||||
"vblendvps $btmp,$b,$a,$b \n\t"
|
||||
"vblendvps $atmp,$a,$b,$b \n\t"
|
||||
"vmaxss $tmp,$atmp,$btmp \n\t"
|
||||
"cmpps.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"blendvps $dst,$tmp,$atmp,$btmp \n\t"
|
||||
"vcmpps.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"vblendvps $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
__ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -5253,19 +5283,19 @@ instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp,
|
||||
match(Set dst (MaxD a b));
|
||||
effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
|
||||
format %{
|
||||
"blendvpd $btmp,$b,$a,$b \n\t"
|
||||
"blendvpd $atmp,$a,$b,$b \n\t"
|
||||
"vblendvpd $btmp,$b,$a,$b \n\t"
|
||||
"vblendvpd $atmp,$a,$b,$b \n\t"
|
||||
"vmaxsd $tmp,$atmp,$btmp \n\t"
|
||||
"cmppd.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"blendvpd $dst,$tmp,$atmp,$btmp \n\t"
|
||||
"vcmppd.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"vblendvpd $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
__ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -5289,19 +5319,19 @@ instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp,
|
||||
match(Set dst (MinF a b));
|
||||
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
|
||||
format %{
|
||||
"blendvps $atmp,$a,$b,$a \n\t"
|
||||
"blendvps $btmp,$b,$a,$a \n\t"
|
||||
"vblendvps $atmp,$a,$b,$a \n\t"
|
||||
"vblendvps $btmp,$b,$a,$a \n\t"
|
||||
"vminss $tmp,$atmp,$btmp \n\t"
|
||||
"cmpps.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"blendvps $dst,$tmp,$atmp,$btmp \n\t"
|
||||
"vcmpps.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"vblendvps $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
__ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -5325,19 +5355,19 @@ instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp,
|
||||
match(Set dst (MinD a b));
|
||||
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
|
||||
format %{
|
||||
"blendvpd $atmp,$a,$b,$a \n\t"
|
||||
"blendvpd $btmp,$b,$a,$a \n\t"
|
||||
"vblendvpd $atmp,$a,$b,$a \n\t"
|
||||
"vblendvpd $btmp,$b,$a,$a \n\t"
|
||||
"vminsd $tmp,$atmp,$btmp \n\t"
|
||||
"cmppd.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"blendvpd $dst,$tmp,$atmp,$btmp \n\t"
|
||||
"vcmppd.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"vblendvpd $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
__ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -5561,7 +5591,7 @@ instruct loadConI(rRegI dst, immI src)
|
||||
ins_pipe(ialu_reg_fat); // XXX
|
||||
%}
|
||||
|
||||
instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
|
||||
instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst src);
|
||||
effect(KILL cr);
|
||||
@ -5997,7 +6027,7 @@ instruct storeImmNKlass(memory mem, immNKlass src)
|
||||
%}
|
||||
|
||||
// Store Integer Immediate
|
||||
instruct storeImmI0(memory mem, immI0 zero)
|
||||
instruct storeImmI0(memory mem, immI_0 zero)
|
||||
%{
|
||||
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
|
||||
match(Set mem (StoreI mem zero));
|
||||
@ -6047,7 +6077,7 @@ instruct storeImmL(memory mem, immL32 src)
|
||||
%}
|
||||
|
||||
// Store Short/Char Immediate
|
||||
instruct storeImmC0(memory mem, immI0 zero)
|
||||
instruct storeImmC0(memory mem, immI_0 zero)
|
||||
%{
|
||||
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
|
||||
match(Set mem (StoreC mem zero));
|
||||
@ -6073,7 +6103,7 @@ instruct storeImmI16(memory mem, immI16 src)
|
||||
%}
|
||||
|
||||
// Store Byte Immediate
|
||||
instruct storeImmB0(memory mem, immI0 zero)
|
||||
instruct storeImmB0(memory mem, immI_0 zero)
|
||||
%{
|
||||
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
|
||||
match(Set mem (StoreB mem zero));
|
||||
@ -6098,7 +6128,7 @@ instruct storeImmB(memory mem, immI8 src)
|
||||
%}
|
||||
|
||||
// Store CMS card-mark Immediate
|
||||
instruct storeImmCM0_reg(memory mem, immI0 zero)
|
||||
instruct storeImmCM0_reg(memory mem, immI_0 zero)
|
||||
%{
|
||||
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
|
||||
match(Set mem (StoreCM mem zero));
|
||||
@ -6111,7 +6141,7 @@ instruct storeImmCM0_reg(memory mem, immI0 zero)
|
||||
ins_pipe(ialu_mem_reg);
|
||||
%}
|
||||
|
||||
instruct storeImmCM0(memory mem, immI0 src)
|
||||
instruct storeImmCM0(memory mem, immI_0 src)
|
||||
%{
|
||||
match(Set mem (StoreCM mem src));
|
||||
|
||||
@ -7196,7 +7226,7 @@ instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
|
||||
ins_pipe(ialu_mem_imm);
|
||||
%}
|
||||
|
||||
instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
|
||||
instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseIncDec);
|
||||
match(Set dst (AddI dst src));
|
||||
@ -7208,7 +7238,7 @@ instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
|
||||
instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseIncDec);
|
||||
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
|
||||
@ -8091,7 +8121,7 @@ instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
|
||||
|
||||
// Subtract from a pointer
|
||||
// XXX hmpf???
|
||||
instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
|
||||
instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (AddP dst (SubI zero src)));
|
||||
effect(KILL cr);
|
||||
@ -8102,7 +8132,7 @@ instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
|
||||
ins_pipe(ialu_reg_reg);
|
||||
%}
|
||||
|
||||
instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
|
||||
instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (SubI zero dst));
|
||||
effect(KILL cr);
|
||||
@ -8113,7 +8143,19 @@ instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
|
||||
instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (NegI dst));
|
||||
effect(KILL cr);
|
||||
|
||||
format %{ "negl $dst\t# int" %}
|
||||
ins_encode %{
|
||||
__ negl($dst$$Register);
|
||||
%}
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (StoreI dst (SubI zero (LoadI dst))));
|
||||
effect(KILL cr);
|
||||
@ -8135,6 +8177,18 @@ instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (NegL dst));
|
||||
effect(KILL cr);
|
||||
|
||||
format %{ "negq $dst\t# int" %}
|
||||
ins_encode %{
|
||||
__ negq($dst$$Register);
|
||||
%}
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (StoreL dst (SubL zero (LoadL dst))));
|
||||
@ -8460,7 +8514,7 @@ instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
|
||||
|
||||
// Integer Shift Instructions
|
||||
// Shift Left by one
|
||||
instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
|
||||
instruct salI_rReg_1(rRegI dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (LShiftI dst shift));
|
||||
effect(KILL cr);
|
||||
@ -8472,7 +8526,7 @@ instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// Shift Left by one
|
||||
instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
|
||||
instruct salI_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
|
||||
effect(KILL cr);
|
||||
@ -8532,7 +8586,7 @@ instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// Arithmetic shift right by one
|
||||
instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
|
||||
instruct sarI_rReg_1(rRegI dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (RShiftI dst shift));
|
||||
effect(KILL cr);
|
||||
@ -8544,7 +8598,7 @@ instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// Arithmetic shift right by one
|
||||
instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
|
||||
instruct sarI_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
|
||||
effect(KILL cr);
|
||||
@ -8604,7 +8658,7 @@ instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// Logical shift right by one
|
||||
instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
|
||||
instruct shrI_rReg_1(rRegI dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (URShiftI dst shift));
|
||||
effect(KILL cr);
|
||||
@ -8616,7 +8670,7 @@ instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// Logical shift right by one
|
||||
instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
|
||||
instruct shrI_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
|
||||
effect(KILL cr);
|
||||
@ -8677,7 +8731,7 @@ instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
|
||||
|
||||
// Long Shift Instructions
|
||||
// Shift Left by one
|
||||
instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
|
||||
instruct salL_rReg_1(rRegL dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (LShiftL dst shift));
|
||||
effect(KILL cr);
|
||||
@ -8689,7 +8743,7 @@ instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// Shift Left by one
|
||||
instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
|
||||
instruct salL_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
|
||||
effect(KILL cr);
|
||||
@ -8750,7 +8804,7 @@ instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// Arithmetic shift right by one
|
||||
instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
|
||||
instruct sarL_rReg_1(rRegL dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (RShiftL dst shift));
|
||||
effect(KILL cr);
|
||||
@ -8762,7 +8816,7 @@ instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// Arithmetic shift right by one
|
||||
instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
|
||||
instruct sarL_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
|
||||
effect(KILL cr);
|
||||
@ -8823,7 +8877,7 @@ instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// Logical shift right by one
|
||||
instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
|
||||
instruct shrL_rReg_1(rRegL dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (URShiftL dst shift));
|
||||
effect(KILL cr);
|
||||
@ -8835,7 +8889,7 @@ instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// Logical shift right by one
|
||||
instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
|
||||
instruct shrL_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
|
||||
effect(KILL cr);
|
||||
@ -9207,7 +9261,7 @@ instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
|
||||
instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
|
||||
match(Set dst (AndI (SubI imm_zero src) src));
|
||||
predicate(UseBMI1Instructions);
|
||||
effect(KILL cr);
|
||||
@ -9220,7 +9274,7 @@ instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
|
||||
ins_pipe(ialu_reg);
|
||||
%}
|
||||
|
||||
instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
|
||||
instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
|
||||
match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
|
||||
predicate(UseBMI1Instructions);
|
||||
effect(KILL cr);
|
||||
@ -9903,7 +9957,7 @@ instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
|
||||
instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (CmpLTMask dst zero));
|
||||
effect(KILL cr);
|
||||
@ -11250,7 +11304,7 @@ instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
|
||||
ins_pipe(ialu_reg_reg);
|
||||
%}
|
||||
|
||||
instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
|
||||
instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
|
||||
%{
|
||||
match(Set cr (OverflowSubI zero op2));
|
||||
effect(DEF cr, USE_KILL op2);
|
||||
@ -11359,7 +11413,7 @@ instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
|
||||
ins_pipe(ialu_cr_reg_mem);
|
||||
%}
|
||||
|
||||
instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
|
||||
instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
|
||||
%{
|
||||
match(Set cr (CmpI src zero));
|
||||
|
||||
@ -11369,7 +11423,7 @@ instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
|
||||
ins_pipe(ialu_cr_reg_imm);
|
||||
%}
|
||||
|
||||
instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
|
||||
instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
|
||||
%{
|
||||
match(Set cr (CmpI (AndI src con) zero));
|
||||
|
||||
@ -11379,7 +11433,7 @@ instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
|
||||
ins_pipe(ialu_cr_reg_imm);
|
||||
%}
|
||||
|
||||
instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
|
||||
instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
|
||||
%{
|
||||
match(Set cr (CmpI (AndI src (LoadI mem)) zero));
|
||||
|
||||
@ -11433,7 +11487,7 @@ instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
|
||||
// // ins_encode( OpcP, reg_mem( op1, op2) );
|
||||
// //%}
|
||||
|
||||
instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
|
||||
instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
|
||||
%{
|
||||
match(Set cr (CmpU src zero));
|
||||
|
||||
@ -11771,7 +11825,7 @@ instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
|
||||
ins_pipe(ialu_cr_reg_mem);
|
||||
%}
|
||||
|
||||
instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI0 zero)
|
||||
instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
|
||||
%{
|
||||
match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
|
||||
|
||||
@ -11781,7 +11835,7 @@ instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI0 zero)
|
||||
ins_pipe(ialu_cr_reg_mem);
|
||||
%}
|
||||
|
||||
instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI0 zero)
|
||||
instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
|
||||
%{
|
||||
match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
|
||||
|
||||
@ -12504,7 +12558,7 @@ instruct tlsLoadP(r15_RegP dst) %{
|
||||
// match(Set dst (CopyI src));
|
||||
// %}
|
||||
//
|
||||
// instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
|
||||
// instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
|
||||
// %{
|
||||
// match(Set dst (AddI dst src));
|
||||
// effect(KILL cr);
|
||||
|
||||
@ -268,6 +268,7 @@ Form::DataType Form::is_load_from_memory(const char *opType) const {
|
||||
if( strcmp(opType,"LoadRange")==0 ) return Form::idealI;
|
||||
if( strcmp(opType,"LoadS")==0 ) return Form::idealS;
|
||||
if( strcmp(opType,"LoadVector")==0 ) return Form::idealV;
|
||||
if( strcmp(opType,"LoadVectorGather")==0 ) return Form::idealV;
|
||||
assert( strcmp(opType,"Load") != 0, "Must type Loads" );
|
||||
return Form::none;
|
||||
}
|
||||
@ -284,6 +285,7 @@ Form::DataType Form::is_store_to_memory(const char *opType) const {
|
||||
if( strcmp(opType,"StoreN")==0) return Form::idealN;
|
||||
if( strcmp(opType,"StoreNKlass")==0) return Form::idealNKlass;
|
||||
if( strcmp(opType,"StoreVector")==0 ) return Form::idealV;
|
||||
if( strcmp(opType,"StoreVectorScatter")==0 ) return Form::idealV;
|
||||
assert( strcmp(opType,"Store") != 0, "Must type Stores" );
|
||||
return Form::none;
|
||||
}
|
||||
|
||||
@ -3484,7 +3484,7 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
|
||||
"StoreB","StoreC","Store" ,"StoreFP",
|
||||
"LoadI", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" ,
|
||||
"LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" ,
|
||||
"StoreVector", "LoadVector",
|
||||
"StoreVector", "LoadVector", "LoadVectorGather", "StoreVectorScatter",
|
||||
"LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned",
|
||||
"LoadPLocked",
|
||||
"StorePConditional", "StoreIConditional", "StoreLConditional",
|
||||
@ -3801,6 +3801,7 @@ void MatchNode::count_commutative_op(int& count) {
|
||||
"MaxV", "MinV",
|
||||
"MulI","MulL","MulF","MulD",
|
||||
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
|
||||
"MinV","MaxV",
|
||||
"OrI","OrL",
|
||||
"OrV",
|
||||
"XorI","XorL",
|
||||
@ -4151,8 +4152,9 @@ bool MatchRule::is_vector() const {
|
||||
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
|
||||
"CMoveVD", "CMoveVF",
|
||||
"DivVF","DivVD",
|
||||
"MinV","MaxV",
|
||||
"AbsVB","AbsVS","AbsVI","AbsVL","AbsVF","AbsVD",
|
||||
"NegVF","NegVD",
|
||||
"NegVF","NegVD","NegVI",
|
||||
"SqrtVD","SqrtVF",
|
||||
"AndV" ,"XorV" ,"OrV",
|
||||
"MaxV", "MinV",
|
||||
@ -4169,6 +4171,12 @@ bool MatchRule::is_vector() const {
|
||||
"URShiftVB","URShiftVS","URShiftVI","URShiftVL",
|
||||
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD",
|
||||
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
|
||||
"LoadVectorGather", "StoreVectorScatter",
|
||||
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
|
||||
"VectorRearrange","VectorLoadShuffle", "VectorLoadConst",
|
||||
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
|
||||
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
|
||||
"VectorMaskWrapper", "VectorMaskCmp", "VectorReinterpret",
|
||||
"FmaVD", "FmaVF","PopCountVI",
|
||||
// Next are not supported currently.
|
||||
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
|
||||
|
||||
@ -222,6 +222,7 @@ static bool trust_final_non_static_fields(ciInstanceKlass* holder) {
|
||||
// Even if general trusting is disabled, trust system-built closures in these packages.
|
||||
if (holder->is_in_package("java/lang/invoke") || holder->is_in_package("sun/invoke") ||
|
||||
holder->is_in_package("jdk/internal/foreign") || holder->is_in_package("jdk/incubator/foreign") ||
|
||||
holder->is_in_package("jdk/internal/vm/vector") || holder->is_in_package("jdk/incubator/vector") ||
|
||||
holder->is_in_package("java/lang"))
|
||||
return true;
|
||||
// Trust hidden classes and VM unsafe anonymous classes. They are created via
|
||||
|
||||
@ -1353,6 +1353,11 @@ bool ciMethod::is_unboxing_method() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ciMethod::is_vector_method() const {
|
||||
return (holder() == ciEnv::current()->vector_VectorSupport_klass()) &&
|
||||
(intrinsic_id() != vmIntrinsics::_none);
|
||||
}
|
||||
|
||||
BCEscapeAnalyzer *ciMethod::get_bcea() {
|
||||
#ifdef COMPILER2
|
||||
if (_bcea == NULL) {
|
||||
|
||||
@ -356,6 +356,7 @@ class ciMethod : public ciMetadata {
|
||||
bool has_reserved_stack_access() const { return _has_reserved_stack_access; }
|
||||
bool is_boxing_method() const;
|
||||
bool is_unboxing_method() const;
|
||||
bool is_vector_method() const;
|
||||
bool is_object_initializer() const;
|
||||
|
||||
bool can_be_statically_bound(ciInstanceKlass* context) const;
|
||||
|
||||
@ -4539,6 +4539,30 @@ void java_util_concurrent_locks_AbstractOwnableSynchronizer::serialize_offsets(S
|
||||
}
|
||||
#endif
|
||||
|
||||
int vector_VectorPayload::_payload_offset;
|
||||
|
||||
#define VECTORPAYLOAD_FIELDS_DO(macro) \
|
||||
macro(_payload_offset, k, "payload", object_signature, false)
|
||||
|
||||
void vector_VectorPayload::compute_offsets() {
|
||||
InstanceKlass* k = SystemDictionary::vector_VectorPayload_klass();
|
||||
VECTORPAYLOAD_FIELDS_DO(FIELD_COMPUTE_OFFSET);
|
||||
}
|
||||
|
||||
#if INCLUDE_CDS
|
||||
void vector_VectorPayload::serialize_offsets(SerializeClosure* f) {
|
||||
VECTORPAYLOAD_FIELDS_DO(FIELD_SERIALIZE_OFFSET);
|
||||
}
|
||||
#endif
|
||||
|
||||
void vector_VectorPayload::set_payload(oop o, oop val) {
|
||||
o->obj_field_put(_payload_offset, val);
|
||||
}
|
||||
|
||||
bool vector_VectorPayload::is_instance(oop obj) {
|
||||
return obj != NULL && is_subclass(obj->klass());
|
||||
}
|
||||
|
||||
int java_lang_Integer_IntegerCache::_static_cache_offset;
|
||||
int java_lang_Long_LongCache::_static_cache_offset;
|
||||
int java_lang_Character_CharacterCache::_static_cache_offset;
|
||||
|
||||
@ -76,6 +76,7 @@ class RecordComponent;
|
||||
f(java_util_concurrent_locks_AbstractOwnableSynchronizer) \
|
||||
f(jdk_internal_misc_UnsafeConstants) \
|
||||
f(java_lang_boxing_object) \
|
||||
f(vector_VectorPayload) \
|
||||
//end
|
||||
|
||||
#define BASIC_JAVA_CLASSES_DO(f) \
|
||||
@ -1564,6 +1565,24 @@ class jdk_internal_misc_UnsafeConstants : AllStatic {
|
||||
static void serialize_offsets(SerializeClosure* f) { }
|
||||
};
|
||||
|
||||
// Interface to jdk.internal.vm.vector.VectorSupport.VectorPayload objects
|
||||
|
||||
class vector_VectorPayload : AllStatic {
|
||||
private:
|
||||
static int _payload_offset;
|
||||
public:
|
||||
static void set_payload(oop o, oop val);
|
||||
|
||||
static void compute_offsets();
|
||||
static void serialize_offsets(SerializeClosure* f) NOT_CDS_RETURN;
|
||||
|
||||
// Testers
|
||||
static bool is_subclass(Klass* klass) {
|
||||
return klass->is_subclass_of(SystemDictionary::vector_VectorPayload_klass());
|
||||
}
|
||||
static bool is_instance(oop obj);
|
||||
};
|
||||
|
||||
class java_lang_Integer : AllStatic {
|
||||
public:
|
||||
static jint value(oop obj);
|
||||
|
||||
@ -43,6 +43,7 @@
|
||||
#include "memory/metaspaceShared.hpp"
|
||||
#include "memory/resourceArea.hpp"
|
||||
#include "prims/jvmtiExport.hpp"
|
||||
#include "runtime/globals_extension.hpp"
|
||||
#include "runtime/handles.inline.hpp"
|
||||
#include "runtime/javaCalls.hpp"
|
||||
#include "runtime/jniHandles.inline.hpp"
|
||||
@ -452,6 +453,24 @@ void Modules::define_module(jobject module, jboolean is_open, jstring version,
|
||||
if (h_loader.is_null() && !ClassLoader::has_jrt_entry()) {
|
||||
ClassLoader::add_to_exploded_build_list(module_symbol, CHECK);
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
// Special handling of jdk.incubator.vector
|
||||
if (strcmp(module_name, "jdk.incubator.vector") == 0) {
|
||||
if (FLAG_IS_DEFAULT(EnableVectorSupport)) {
|
||||
FLAG_SET_DEFAULT(EnableVectorSupport, true);
|
||||
}
|
||||
if (EnableVectorSupport && FLAG_IS_DEFAULT(EnableVectorReboxing)) {
|
||||
FLAG_SET_DEFAULT(EnableVectorReboxing, true);
|
||||
}
|
||||
if (EnableVectorSupport && EnableVectorReboxing && FLAG_IS_DEFAULT(EnableVectorAggressiveReboxing)) {
|
||||
FLAG_SET_DEFAULT(EnableVectorAggressiveReboxing, true);
|
||||
}
|
||||
log_info(compilation)("EnableVectorSupport=%s", (EnableVectorSupport ? "true" : "false"));
|
||||
log_info(compilation)("EnableVectorReboxing=%s", (EnableVectorReboxing ? "true" : "false"));
|
||||
log_info(compilation)("EnableVectorAggressiveReboxing=%s", (EnableVectorAggressiveReboxing ? "true" : "false"));
|
||||
}
|
||||
#endif // COMPILER2
|
||||
}
|
||||
|
||||
#if INCLUDE_CDS_JAVA_HEAP
|
||||
|
||||
@ -226,6 +226,13 @@ class TableStatistics;
|
||||
/* support for records */ \
|
||||
do_klass(RecordComponent_klass, java_lang_reflect_RecordComponent ) \
|
||||
\
|
||||
/* support for vectors*/ \
|
||||
do_klass(vector_VectorSupport_klass, jdk_internal_vm_vector_VectorSupport ) \
|
||||
do_klass(vector_VectorPayload_klass, jdk_internal_vm_vector_VectorPayload ) \
|
||||
do_klass(vector_Vector_klass, jdk_internal_vm_vector_Vector ) \
|
||||
do_klass(vector_VectorMask_klass, jdk_internal_vm_vector_VectorMask ) \
|
||||
do_klass(vector_VectorShuffle_klass, jdk_internal_vm_vector_VectorShuffle ) \
|
||||
\
|
||||
/*end*/
|
||||
|
||||
class SystemDictionary : AllStatic {
|
||||
|
||||
@ -778,6 +778,122 @@
|
||||
do_intrinsic(_getAndSetReference, jdk_internal_misc_Unsafe, getAndSetReference_name, getAndSetReference_signature, F_R) \
|
||||
do_name( getAndSetReference_name, "getAndSetReference") \
|
||||
do_signature(getAndSetReference_signature, "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \
|
||||
\
|
||||
/* Vector API intrinsification support */ \
|
||||
\
|
||||
do_intrinsic(_VectorUnaryOp, jdk_internal_vm_vector_VectorSupport, vector_unary_op_name, vector_unary_op_sig, F_S) \
|
||||
do_signature(vector_unary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/util/function/Function;)Ljava/lang/Object;") \
|
||||
do_name(vector_unary_op_name, "unaryOp") \
|
||||
\
|
||||
do_intrinsic(_VectorBinaryOp, jdk_internal_vm_vector_VectorSupport, vector_binary_op_name, vector_binary_op_sig, F_S) \
|
||||
do_signature(vector_binary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;" \
|
||||
"Ljava/util/function/BiFunction;)Ljava/lang/Object;") \
|
||||
do_name(vector_binary_op_name, "binaryOp") \
|
||||
\
|
||||
do_intrinsic(_VectorTernaryOp, jdk_internal_vm_vector_VectorSupport, vector_ternary_op_name, vector_ternary_op_sig, F_S) \
|
||||
do_signature(vector_ternary_op_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;" \
|
||||
"Ljava/lang/Object;Ljdk/internal/vm/vector/VectorSupport$TernaryOperation;)Ljava/lang/Object;") \
|
||||
do_name(vector_ternary_op_name, "ternaryOp") \
|
||||
\
|
||||
do_intrinsic(_VectorBroadcastCoerced, jdk_internal_vm_vector_VectorSupport, vector_broadcast_coerced_name, vector_broadcast_coerced_sig, F_S)\
|
||||
do_signature(vector_broadcast_coerced_sig, "(Ljava/lang/Class;Ljava/lang/Class;IJLjdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$BroadcastOperation;)Ljava/lang/Object;") \
|
||||
do_name(vector_broadcast_coerced_name, "broadcastCoerced") \
|
||||
\
|
||||
do_intrinsic(_VectorShuffleIota, jdk_internal_vm_vector_VectorSupport, vector_shuffle_step_iota_name, vector_shuffle_step_iota_sig, F_S) \
|
||||
do_signature(vector_shuffle_step_iota_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
|
||||
"IIIILjdk/internal/vm/vector/VectorSupport$ShuffleIotaOperation;)Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \
|
||||
do_name(vector_shuffle_step_iota_name, "shuffleIota") \
|
||||
\
|
||||
do_intrinsic(_VectorShuffleToVector, jdk_internal_vm_vector_VectorSupport, vector_shuffle_to_vector_name, vector_shuffle_to_vector_sig, F_S) \
|
||||
do_signature(vector_shuffle_to_vector_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
|
||||
"ILjdk/internal/vm/vector/VectorSupport$ShuffleToVectorOperation;)Ljava/lang/Object;") \
|
||||
do_name(vector_shuffle_to_vector_name, "shuffleToVector") \
|
||||
\
|
||||
do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \
|
||||
do_signature(vector_load_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjava/lang/Object;" \
|
||||
"ILjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)Ljava/lang/Object;") \
|
||||
do_name(vector_load_op_name, "load") \
|
||||
\
|
||||
do_intrinsic(_VectorStoreOp, jdk_internal_vm_vector_VectorSupport, vector_store_op_name, vector_store_op_sig, F_S) \
|
||||
do_signature(vector_store_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$Vector;" \
|
||||
"Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)V") \
|
||||
do_name(vector_store_op_name, "store") \
|
||||
\
|
||||
do_intrinsic(_VectorReductionCoerced, jdk_internal_vm_vector_VectorSupport, vector_reduction_coerced_name, vector_reduction_coerced_sig, F_S) \
|
||||
do_signature(vector_reduction_coerced_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$Vector;Ljava/util/function/Function;)J") \
|
||||
do_name(vector_reduction_coerced_name, "reductionCoerced") \
|
||||
\
|
||||
do_intrinsic(_VectorTest, jdk_internal_vm_vector_VectorSupport, vector_test_name, vector_test_sig, F_S) \
|
||||
do_signature(vector_test_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;Ljava/util/function/BiFunction;)Z") \
|
||||
do_name(vector_test_name, "test") \
|
||||
\
|
||||
do_intrinsic(_VectorBlend, jdk_internal_vm_vector_VectorSupport, vector_blend_name, vector_blend_sig, F_S) \
|
||||
do_signature(vector_blend_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorBlendOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
|
||||
do_name(vector_blend_name, "blend") \
|
||||
\
|
||||
do_intrinsic(_VectorCompare, jdk_internal_vm_vector_VectorSupport, vector_compare_name, vector_compare_sig, F_S) \
|
||||
do_signature(vector_compare_sig, "(ILjava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;" "Ljdk/internal/vm/vector/VectorSupport$Vector;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorCompareOp;" ")" "Ljdk/internal/vm/vector/VectorSupport$VectorMask;") \
|
||||
do_name(vector_compare_name, "compare") \
|
||||
\
|
||||
do_intrinsic(_VectorRearrange, jdk_internal_vm_vector_VectorSupport, vector_rearrange_name, vector_rearrange_sig, F_S) \
|
||||
do_signature(vector_rearrange_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorRearrangeOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
|
||||
do_name(vector_rearrange_name, "rearrangeOp") \
|
||||
\
|
||||
do_intrinsic(_VectorExtract, jdk_internal_vm_vector_VectorSupport, vector_extract_name, vector_extract_sig, F_S) \
|
||||
do_signature(vector_extract_sig, "(Ljava/lang/Class;Ljava/lang/Class;I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VecExtractOp;)J") \
|
||||
do_name(vector_extract_name, "extract") \
|
||||
\
|
||||
do_intrinsic(_VectorInsert, jdk_internal_vm_vector_VectorSupport, vector_insert_name, vector_insert_sig, F_S) \
|
||||
do_signature(vector_insert_sig, "(Ljava/lang/Class;Ljava/lang/Class;I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;IJ" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VecInsertOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
|
||||
do_name(vector_insert_name, "insert") \
|
||||
\
|
||||
do_intrinsic(_VectorBroadcastInt, jdk_internal_vm_vector_VectorSupport, vector_broadcast_int_name, vector_broadcast_int_sig, F_S) \
|
||||
do_signature(vector_broadcast_int_sig, "(ILjava/lang/Class;Ljava/lang/Class;I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorBroadcastIntOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;") \
|
||||
do_name(vector_broadcast_int_name, "broadcastInt") \
|
||||
\
|
||||
do_intrinsic(_VectorConvert, jdk_internal_vm_vector_VectorSupport, vector_convert_name, vector_convert_sig, F_S) \
|
||||
do_signature(vector_convert_sig, "(ILjava/lang/Class;Ljava/lang/Class;I" \
|
||||
"Ljava/lang/Class;Ljava/lang/Class;I" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorPayload;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorConvertOp;)Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \
|
||||
do_name(vector_convert_name, "convert") \
|
||||
\
|
||||
do_intrinsic(_VectorGatherOp, jdk_internal_vm_vector_VectorSupport, vector_gather_name, vector_gather_sig, F_S) \
|
||||
do_signature(vector_gather_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Class;" \
|
||||
"Ljava/lang/Object;J" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
|
||||
"Ljava/lang/Object;I[II" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$LoadVectorOperationWithMap;)" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
|
||||
do_name(vector_gather_name, "loadWithMap") \
|
||||
\
|
||||
do_intrinsic(_VectorScatterOp, jdk_internal_vm_vector_VectorSupport, vector_scatter_name, vector_scatter_sig, F_S) \
|
||||
do_signature(vector_scatter_sig, "(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Class;" \
|
||||
"Ljava/lang/Object;J" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$Vector;" \
|
||||
"Ljava/lang/Object;I[II" \
|
||||
"Ljdk/internal/vm/vector/VectorSupport$StoreVectorOperationWithMap;)V") \
|
||||
do_name(vector_scatter_name, "storeWithMap") \
|
||||
\
|
||||
do_intrinsic(_VectorRebox, jdk_internal_vm_vector_VectorSupport, vector_rebox_name, vector_rebox_sig, F_S) \
|
||||
do_alias(vector_rebox_sig, object_object_signature) \
|
||||
do_name(vector_rebox_name, "maybeRebox") \
|
||||
\
|
||||
\
|
||||
/* (2) Bytecode intrinsics */ \
|
||||
\
|
||||
|
||||
@ -81,6 +81,16 @@
|
||||
template(java_lang_Integer_IntegerCache, "java/lang/Integer$IntegerCache") \
|
||||
template(java_lang_Long, "java/lang/Long") \
|
||||
template(java_lang_Long_LongCache, "java/lang/Long$LongCache") \
|
||||
\
|
||||
template(jdk_internal_vm_vector_VectorSupport, "jdk/internal/vm/vector/VectorSupport") \
|
||||
template(jdk_internal_vm_vector_VectorPayload, "jdk/internal/vm/vector/VectorSupport$VectorPayload") \
|
||||
template(jdk_internal_vm_vector_Vector, "jdk/internal/vm/vector/VectorSupport$Vector") \
|
||||
template(jdk_internal_vm_vector_VectorMask, "jdk/internal/vm/vector/VectorSupport$VectorMask") \
|
||||
template(jdk_internal_vm_vector_VectorShuffle, "jdk/internal/vm/vector/VectorSupport$VectorShuffle") \
|
||||
template(payload_name, "payload") \
|
||||
template(ETYPE_name, "ETYPE") \
|
||||
template(VLENGTH_name, "VLENGTH") \
|
||||
\
|
||||
template(java_lang_Shutdown, "java/lang/Shutdown") \
|
||||
template(java_lang_ref_Reference, "java/lang/ref/Reference") \
|
||||
template(java_lang_ref_SoftReference, "java/lang/ref/SoftReference") \
|
||||
@ -768,7 +778,7 @@ class vmIntrinsics: AllStatic {
|
||||
#undef VM_INTRINSIC_ENUM
|
||||
|
||||
ID_LIMIT,
|
||||
LAST_COMPILER_INLINE = _getAndSetReference,
|
||||
LAST_COMPILER_INLINE = _VectorScatterOp,
|
||||
FIRST_MH_SIG_POLY = _invokeGeneric,
|
||||
FIRST_MH_STATIC = _linkToVirtual,
|
||||
LAST_MH_SIG_POLY = _linkToInterface,
|
||||
|
||||
@ -42,6 +42,7 @@
|
||||
// - ConstantValue describes a constant
|
||||
|
||||
class ConstantOopReadValue;
|
||||
class LocationValue;
|
||||
class ObjectValue;
|
||||
|
||||
class ScopeValue: public ResourceObj {
|
||||
@ -67,6 +68,11 @@ class ScopeValue: public ResourceObj {
|
||||
return (ObjectValue*)this;
|
||||
}
|
||||
|
||||
LocationValue* as_LocationValue() {
|
||||
assert(is_location(), "must be");
|
||||
return (LocationValue*)this;
|
||||
}
|
||||
|
||||
// Serialization of debugging information
|
||||
virtual void write_on(DebugInfoWriteStream* stream) = 0;
|
||||
static ScopeValue* read_from(DebugInfoReadStream* stream);
|
||||
|
||||
@ -58,6 +58,7 @@ class Location {
|
||||
lng, // Long held in one register
|
||||
float_in_dbl, // Float held in double register
|
||||
dbl, // Double held in one register
|
||||
vector, // Vector in one register
|
||||
addr, // JSR return address
|
||||
narrowoop // Narrow Oop (please GC me!)
|
||||
};
|
||||
|
||||
@ -281,6 +281,30 @@ public:
|
||||
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
|
||||
};
|
||||
|
||||
//------------------------------MaxLNode---------------------------------------
|
||||
// MAXimum of 2 longs.
|
||||
class MaxLNode : public MaxNode {
|
||||
public:
|
||||
MaxLNode(Node *in1, Node *in2) : MaxNode(in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type *add_ring(const Type*, const Type*) const { return TypeLong::LONG; }
|
||||
virtual const Type *add_id() const { return TypeLong::make(min_jlong); }
|
||||
virtual const Type *bottom_type() const { return TypeLong::LONG; }
|
||||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
};
|
||||
|
||||
//------------------------------MinLNode---------------------------------------
|
||||
// MINimum of 2 longs.
|
||||
class MinLNode : public MaxNode {
|
||||
public:
|
||||
MinLNode(Node *in1, Node *in2) : MaxNode(in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type *add_ring(const Type*, const Type*) const { return TypeLong::LONG; }
|
||||
virtual const Type *add_id() const { return TypeLong::make(max_jlong); }
|
||||
virtual const Type *bottom_type() const { return TypeLong::LONG; }
|
||||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
};
|
||||
|
||||
//------------------------------MaxFNode---------------------------------------
|
||||
// Maximum of 2 floats.
|
||||
class MaxFNode : public MaxNode {
|
||||
|
||||
@ -743,6 +743,15 @@
|
||||
product(bool, UseMontgomerySquareIntrinsic, false, DIAGNOSTIC, \
|
||||
"Enables intrinsification of BigInteger.montgomerySquare()") \
|
||||
\
|
||||
product(bool, EnableVectorSupport, false, EXPERIMENTAL, \
|
||||
"Enables VectorSupport intrinsics") \
|
||||
\
|
||||
product(bool, EnableVectorReboxing, false, EXPERIMENTAL, \
|
||||
"Enables reboxing of vectors") \
|
||||
\
|
||||
product(bool, EnableVectorAggressiveReboxing, false, EXPERIMENTAL, \
|
||||
"Enables aggressive reboxing of vectors") \
|
||||
\
|
||||
product(bool, UseTypeSpeculation, true, \
|
||||
"Speculatively propagate types from profiles") \
|
||||
\
|
||||
|
||||
@ -649,6 +649,28 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
||||
case vmIntrinsics::_isCompileConstant:
|
||||
case vmIntrinsics::_Preconditions_checkIndex:
|
||||
break;
|
||||
|
||||
case vmIntrinsics::_VectorUnaryOp:
|
||||
case vmIntrinsics::_VectorBinaryOp:
|
||||
case vmIntrinsics::_VectorTernaryOp:
|
||||
case vmIntrinsics::_VectorBroadcastCoerced:
|
||||
case vmIntrinsics::_VectorShuffleIota:
|
||||
case vmIntrinsics::_VectorShuffleToVector:
|
||||
case vmIntrinsics::_VectorLoadOp:
|
||||
case vmIntrinsics::_VectorStoreOp:
|
||||
case vmIntrinsics::_VectorGatherOp:
|
||||
case vmIntrinsics::_VectorScatterOp:
|
||||
case vmIntrinsics::_VectorReductionCoerced:
|
||||
case vmIntrinsics::_VectorTest:
|
||||
case vmIntrinsics::_VectorBlend:
|
||||
case vmIntrinsics::_VectorRearrange:
|
||||
case vmIntrinsics::_VectorCompare:
|
||||
case vmIntrinsics::_VectorBroadcastInt:
|
||||
case vmIntrinsics::_VectorConvert:
|
||||
case vmIntrinsics::_VectorInsert:
|
||||
case vmIntrinsics::_VectorExtract:
|
||||
return EnableVectorSupport;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -536,7 +536,7 @@ class LateInlineStringCallGenerator : public LateInlineCallGenerator {
|
||||
|
||||
C->add_string_late_inline(this);
|
||||
|
||||
JVMState* new_jvms = DirectCallGenerator::generate(jvms);
|
||||
JVMState* new_jvms = DirectCallGenerator::generate(jvms);
|
||||
return new_jvms;
|
||||
}
|
||||
|
||||
@ -560,7 +560,7 @@ class LateInlineBoxingCallGenerator : public LateInlineCallGenerator {
|
||||
|
||||
C->add_boxing_late_inline(this);
|
||||
|
||||
JVMState* new_jvms = DirectCallGenerator::generate(jvms);
|
||||
JVMState* new_jvms = DirectCallGenerator::generate(jvms);
|
||||
return new_jvms;
|
||||
}
|
||||
};
|
||||
@ -569,6 +569,28 @@ CallGenerator* CallGenerator::for_boxing_late_inline(ciMethod* method, CallGener
|
||||
return new LateInlineBoxingCallGenerator(method, inline_cg);
|
||||
}
|
||||
|
||||
class LateInlineVectorReboxingCallGenerator : public LateInlineCallGenerator {
|
||||
|
||||
public:
|
||||
LateInlineVectorReboxingCallGenerator(ciMethod* method, CallGenerator* inline_cg) :
|
||||
LateInlineCallGenerator(method, inline_cg, /*is_pure=*/true) {}
|
||||
|
||||
virtual JVMState* generate(JVMState* jvms) {
|
||||
Compile *C = Compile::current();
|
||||
|
||||
C->log_inline_id(this);
|
||||
|
||||
C->add_vector_reboxing_late_inline(this);
|
||||
|
||||
JVMState* new_jvms = DirectCallGenerator::generate(jvms);
|
||||
return new_jvms;
|
||||
}
|
||||
};
|
||||
|
||||
// static CallGenerator* for_vector_reboxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
|
||||
CallGenerator* CallGenerator::for_vector_reboxing_late_inline(ciMethod* method, CallGenerator* inline_cg) {
|
||||
return new LateInlineVectorReboxingCallGenerator(method, inline_cg);
|
||||
}
|
||||
//---------------------------WarmCallGenerator--------------------------------
|
||||
// Internal class which handles initial deferral of inlining decisions.
|
||||
class WarmCallGenerator : public CallGenerator {
|
||||
|
||||
@ -127,6 +127,7 @@ class CallGenerator : public ResourceObj {
|
||||
static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const);
|
||||
static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg);
|
||||
static CallGenerator* for_boxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
|
||||
static CallGenerator* for_vector_reboxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
|
||||
|
||||
// How to make a call but defer the decision whether to inline or not.
|
||||
static CallGenerator* for_warm_call(WarmCallInfo* ci,
|
||||
|
||||
@ -290,9 +290,17 @@ Node* CheckCastPPNode::Identity(PhaseGVN* phase) {
|
||||
if (_carry_dependency) {
|
||||
return this;
|
||||
}
|
||||
// Toned down to rescue meeting at a Phi 3 different oops all implementing
|
||||
// the same interface.
|
||||
return (phase->type(in(1)) == phase->type(this)) ? in(1) : this;
|
||||
const Type* t = phase->type(in(1));
|
||||
if (EnableVectorReboxing && in(1)->Opcode() == Op_VectorBox) {
|
||||
if (t->higher_equal_speculative(phase->type(this))) {
|
||||
return in(1);
|
||||
}
|
||||
} else if (t == phase->type(this)) {
|
||||
// Toned down to rescue meeting at a Phi 3 different oops all implementing
|
||||
// the same interface.
|
||||
return in(1);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
//------------------------------Value------------------------------------------
|
||||
|
||||
@ -43,6 +43,7 @@
|
||||
#include "opto/regmask.hpp"
|
||||
#include "opto/runtime.hpp"
|
||||
#include "opto/subnode.hpp"
|
||||
#include "opto/vectornode.hpp"
|
||||
#include "utilities/vmError.hpp"
|
||||
|
||||
// Portions of code courtesy of Clifford Click
|
||||
@ -2387,6 +2388,47 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// Phi (VB ... VB) => VB (Phi ...) (Phi ...)
|
||||
if (EnableVectorReboxing && can_reshape && progress == NULL) {
|
||||
PhaseIterGVN* igvn = phase->is_IterGVN();
|
||||
|
||||
bool all_inputs_are_equiv_vboxes = true;
|
||||
for (uint i = 1; i < req(); ++i) {
|
||||
Node* n = in(i);
|
||||
if (in(i)->Opcode() != Op_VectorBox) {
|
||||
all_inputs_are_equiv_vboxes = false;
|
||||
break;
|
||||
}
|
||||
// Check that vector type of vboxes is equivalent
|
||||
if (i != 1) {
|
||||
if (Type::cmp(in(i-0)->in(VectorBoxNode::Value)->bottom_type(),
|
||||
in(i-1)->in(VectorBoxNode::Value)->bottom_type()) != 0) {
|
||||
all_inputs_are_equiv_vboxes = false;
|
||||
break;
|
||||
}
|
||||
if (Type::cmp(in(i-0)->in(VectorBoxNode::Box)->bottom_type(),
|
||||
in(i-1)->in(VectorBoxNode::Box)->bottom_type()) != 0) {
|
||||
all_inputs_are_equiv_vboxes = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (all_inputs_are_equiv_vboxes) {
|
||||
VectorBoxNode* vbox = static_cast<VectorBoxNode*>(in(1));
|
||||
PhiNode* new_vbox_phi = new PhiNode(r, vbox->box_type());
|
||||
PhiNode* new_vect_phi = new PhiNode(r, vbox->vec_type());
|
||||
for (uint i = 1; i < req(); ++i) {
|
||||
VectorBoxNode* old_vbox = static_cast<VectorBoxNode*>(in(i));
|
||||
new_vbox_phi->set_req(i, old_vbox->in(VectorBoxNode::Box));
|
||||
new_vect_phi->set_req(i, old_vbox->in(VectorBoxNode::Value));
|
||||
}
|
||||
igvn->register_new_node_with_optimizer(new_vbox_phi, this);
|
||||
igvn->register_new_node_with_optimizer(new_vect_phi, this);
|
||||
progress = new VectorBoxNode(igvn->C, new_vbox_phi, new_vect_phi, vbox->box_type(), vbox->vec_type());
|
||||
}
|
||||
}
|
||||
|
||||
return progress; // Return any progress
|
||||
}
|
||||
|
||||
|
||||
@ -198,9 +198,10 @@ macro(LoopLimit)
|
||||
macro(Mach)
|
||||
macro(MachProj)
|
||||
macro(MulAddS2I)
|
||||
macro(MaxI)
|
||||
macro(MaxL)
|
||||
macro(MaxD)
|
||||
macro(MaxF)
|
||||
macro(MaxI)
|
||||
macro(MemBarAcquire)
|
||||
macro(LoadFence)
|
||||
macro(SetVectMaskI)
|
||||
@ -212,9 +213,10 @@ macro(MemBarReleaseLock)
|
||||
macro(MemBarVolatile)
|
||||
macro(MemBarStoreStore)
|
||||
macro(MergeMem)
|
||||
macro(MinD)
|
||||
macro(MinF)
|
||||
macro(MinI)
|
||||
macro(MinL)
|
||||
macro(MinF)
|
||||
macro(MinD)
|
||||
macro(ModD)
|
||||
macro(ModF)
|
||||
macro(ModI)
|
||||
@ -229,6 +231,8 @@ macro(MulHiL)
|
||||
macro(MulI)
|
||||
macro(MulL)
|
||||
macro(Multi)
|
||||
macro(NegI)
|
||||
macro(NegL)
|
||||
macro(NegD)
|
||||
macro(NegF)
|
||||
macro(NeverBranch)
|
||||
@ -324,6 +328,8 @@ macro(TailJump)
|
||||
macro(MacroLogicV)
|
||||
macro(ThreadLocal)
|
||||
macro(Unlock)
|
||||
macro(URShiftB)
|
||||
macro(URShiftS)
|
||||
macro(URShiftI)
|
||||
macro(URShiftL)
|
||||
macro(XorI)
|
||||
@ -366,6 +372,7 @@ macro(AbsVI)
|
||||
macro(AbsVL)
|
||||
macro(AbsVF)
|
||||
macro(AbsVD)
|
||||
macro(NegVI)
|
||||
macro(NegVF)
|
||||
macro(NegVD)
|
||||
macro(SqrtVD)
|
||||
@ -395,7 +402,9 @@ macro(MaxV)
|
||||
macro(MinReductionV)
|
||||
macro(MaxReductionV)
|
||||
macro(LoadVector)
|
||||
macro(LoadVectorGather)
|
||||
macro(StoreVector)
|
||||
macro(StoreVectorScatter)
|
||||
macro(Pack)
|
||||
macro(PackB)
|
||||
macro(PackS)
|
||||
@ -424,3 +433,24 @@ macro(Digit)
|
||||
macro(LowerCase)
|
||||
macro(UpperCase)
|
||||
macro(Whitespace)
|
||||
macro(VectorBox)
|
||||
macro(VectorBoxAllocate)
|
||||
macro(VectorUnbox)
|
||||
macro(VectorMaskWrapper)
|
||||
macro(VectorMaskCmp)
|
||||
macro(VectorTest)
|
||||
macro(VectorBlend)
|
||||
macro(VectorRearrange)
|
||||
macro(VectorLoadMask)
|
||||
macro(VectorLoadShuffle)
|
||||
macro(VectorLoadConst)
|
||||
macro(VectorStoreMask)
|
||||
macro(VectorReinterpret)
|
||||
macro(VectorCast)
|
||||
macro(VectorCastB2X)
|
||||
macro(VectorCastS2X)
|
||||
macro(VectorCastI2X)
|
||||
macro(VectorCastL2X)
|
||||
macro(VectorCastF2X)
|
||||
macro(VectorCastD2X)
|
||||
macro(VectorInsert)
|
||||
|
||||
@ -68,6 +68,7 @@
|
||||
#include "opto/runtime.hpp"
|
||||
#include "opto/stringopts.hpp"
|
||||
#include "opto/type.hpp"
|
||||
#include "opto/vector.hpp"
|
||||
#include "opto/vectornode.hpp"
|
||||
#include "runtime/arguments.hpp"
|
||||
#include "runtime/globals_extension.hpp"
|
||||
@ -412,6 +413,7 @@ void Compile::remove_useless_nodes(Unique_Node_List &useful) {
|
||||
remove_useless_late_inlines(&_string_late_inlines, useful);
|
||||
remove_useless_late_inlines(&_boxing_late_inlines, useful);
|
||||
remove_useless_late_inlines(&_late_inlines, useful);
|
||||
remove_useless_late_inlines(&_vector_reboxing_late_inlines, useful);
|
||||
debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
|
||||
}
|
||||
|
||||
@ -545,6 +547,7 @@ Compile::Compile( ciEnv* ci_env, ciMethod* target, int osr_bci,
|
||||
_late_inlines(comp_arena(), 2, 0, NULL),
|
||||
_string_late_inlines(comp_arena(), 2, 0, NULL),
|
||||
_boxing_late_inlines(comp_arena(), 2, 0, NULL),
|
||||
_vector_reboxing_late_inlines(comp_arena(), 2, 0, NULL),
|
||||
_late_inlines_pos(0),
|
||||
_number_of_mh_late_inlines(0),
|
||||
_print_inlining_stream(NULL),
|
||||
@ -1962,6 +1965,8 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) {
|
||||
|
||||
inline_incrementally_cleanup(igvn);
|
||||
|
||||
print_method(PHASE_INCREMENTAL_INLINE_STEP, 3);
|
||||
|
||||
if (failing()) return;
|
||||
}
|
||||
assert( igvn._worklist.size() == 0, "should be done with igvn" );
|
||||
@ -2096,6 +2101,16 @@ void Compile::Optimize() {
|
||||
// so keep only the actual candidates for optimizations.
|
||||
cleanup_expensive_nodes(igvn);
|
||||
|
||||
assert(EnableVectorSupport || !has_vbox_nodes(), "sanity");
|
||||
if (EnableVectorSupport && has_vbox_nodes()) {
|
||||
TracePhase tp("", &timers[_t_vector]);
|
||||
PhaseVector pv(igvn);
|
||||
pv.optimize_vector_boxes();
|
||||
|
||||
print_method(PHASE_ITER_GVN_AFTER_VECTOR, 2);
|
||||
}
|
||||
assert(!has_vbox_nodes(), "sanity");
|
||||
|
||||
if (!failing() && RenumberLiveNodes && live_nodes() + NodeLimitFudgeFactor < unique()) {
|
||||
Compile::TracePhase tp("", &timers[_t_renumberLive]);
|
||||
initial_gvn()->replace_with(&igvn);
|
||||
@ -2272,6 +2287,35 @@ void Compile::Optimize() {
|
||||
DEBUG_ONLY(set_phase_optimize_finished();)
|
||||
}
|
||||
|
||||
void Compile::inline_vector_reboxing_calls() {
|
||||
if (C->_vector_reboxing_late_inlines.length() > 0) {
|
||||
PhaseGVN* gvn = C->initial_gvn();
|
||||
|
||||
_late_inlines_pos = C->_late_inlines.length();
|
||||
while (_vector_reboxing_late_inlines.length() > 0) {
|
||||
CallGenerator* cg = _vector_reboxing_late_inlines.pop();
|
||||
cg->do_late_inline();
|
||||
if (failing()) return;
|
||||
print_method(PHASE_INLINE_VECTOR_REBOX, cg->call_node());
|
||||
}
|
||||
_vector_reboxing_late_inlines.trunc_to(0);
|
||||
}
|
||||
}
|
||||
|
||||
bool Compile::has_vbox_nodes() {
|
||||
if (C->_vector_reboxing_late_inlines.length() > 0) {
|
||||
return true;
|
||||
}
|
||||
for (int macro_idx = C->macro_count() - 1; macro_idx >= 0; macro_idx--) {
|
||||
Node * n = C->macro_node(macro_idx);
|
||||
assert(n->is_macro(), "only macro nodes expected here");
|
||||
if (n->Opcode() == Op_VectorUnbox || n->Opcode() == Op_VectorBox || n->Opcode() == Op_VectorBoxAllocate) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
//---------------------------- Bitwise operation packing optimization ---------------------------
|
||||
|
||||
static bool is_vector_unary_bitwise_op(Node* n) {
|
||||
@ -2618,8 +2662,8 @@ void Compile::Code_Gen() {
|
||||
if (failing()) {
|
||||
return;
|
||||
}
|
||||
print_method(PHASE_AFTER_MATCHING, 3);
|
||||
}
|
||||
|
||||
// In debug mode can dump m._nodes.dump() for mapping of ideal to machine
|
||||
// nodes. Mapping is only valid at the root of each matched subtree.
|
||||
NOT_PRODUCT( verify_graph_edges(); )
|
||||
@ -2798,7 +2842,8 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
|
||||
// Check for commutative opcode
|
||||
switch( nop ) {
|
||||
case Op_AddI: case Op_AddF: case Op_AddD: case Op_AddL:
|
||||
case Op_MaxI: case Op_MinI:
|
||||
case Op_MaxI: case Op_MaxL: case Op_MaxF: case Op_MaxD:
|
||||
case Op_MinI: case Op_MinL: case Op_MinF: case Op_MinD:
|
||||
case Op_MulI: case Op_MulF: case Op_MulD: case Op_MulL:
|
||||
case Op_AndL: case Op_XorL: case Op_OrL:
|
||||
case Op_AndI: case Op_XorI: case Op_OrI: {
|
||||
@ -3348,6 +3393,8 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f
|
||||
|
||||
case Op_LoadVector:
|
||||
case Op_StoreVector:
|
||||
case Op_LoadVectorGather:
|
||||
case Op_StoreVectorScatter:
|
||||
break;
|
||||
|
||||
case Op_AddReductionVI:
|
||||
@ -4568,26 +4615,43 @@ void Compile::sort_macro_nodes() {
|
||||
}
|
||||
}
|
||||
|
||||
void Compile::print_method(CompilerPhaseType cpt, int level, int idx) {
|
||||
void Compile::print_method(CompilerPhaseType cpt, const char *name, int level, int idx) {
|
||||
EventCompilerPhase event;
|
||||
if (event.should_commit()) {
|
||||
CompilerEvent::PhaseEvent::post(event, C->_latest_stage_start_counter, cpt, C->_compile_id, level);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (should_print(level)) {
|
||||
char output[1024];
|
||||
if (idx != 0) {
|
||||
jio_snprintf(output, sizeof(output), "%s:%d", CompilerPhaseTypeHelper::to_string(cpt), idx);
|
||||
} else {
|
||||
jio_snprintf(output, sizeof(output), "%s", CompilerPhaseTypeHelper::to_string(cpt));
|
||||
}
|
||||
_printer->print_method(output, level);
|
||||
_printer->print_method(name, level);
|
||||
}
|
||||
#endif
|
||||
C->_latest_stage_start_counter.stamp();
|
||||
}
|
||||
|
||||
void Compile::print_method(CompilerPhaseType cpt, int level, int idx) {
|
||||
char output[1024];
|
||||
#ifndef PRODUCT
|
||||
if (idx != 0) {
|
||||
jio_snprintf(output, sizeof(output), "%s:%d", CompilerPhaseTypeHelper::to_string(cpt), idx);
|
||||
} else {
|
||||
jio_snprintf(output, sizeof(output), "%s", CompilerPhaseTypeHelper::to_string(cpt));
|
||||
}
|
||||
#endif
|
||||
print_method(cpt, output, level, idx);
|
||||
}
|
||||
|
||||
void Compile::print_method(CompilerPhaseType cpt, Node* n, int level) {
|
||||
ResourceMark rm;
|
||||
stringStream ss;
|
||||
ss.print_raw(CompilerPhaseTypeHelper::to_string(cpt));
|
||||
if (n != NULL) {
|
||||
ss.print(": %d %s ", n->_idx, NodeClassNames[n->Opcode()]);
|
||||
} else {
|
||||
ss.print_raw(": NULL");
|
||||
}
|
||||
C->print_method(cpt, ss.as_string(), level);
|
||||
}
|
||||
|
||||
void Compile::end_method(int level) {
|
||||
EventCompilerPhase event;
|
||||
if (event.should_commit()) {
|
||||
|
||||
@ -382,6 +382,8 @@ class Compile : public Phase {
|
||||
|
||||
GrowableArray<CallGenerator*> _boxing_late_inlines; // same but for boxing operations
|
||||
|
||||
GrowableArray<CallGenerator*> _vector_reboxing_late_inlines; // same but for vector reboxing operations
|
||||
|
||||
int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining)
|
||||
uint _number_of_mh_late_inlines; // number of method handle late inlining still pending
|
||||
|
||||
@ -644,7 +646,9 @@ class Compile : public Phase {
|
||||
#endif
|
||||
}
|
||||
|
||||
void print_method(CompilerPhaseType cpt, const char *name, int level = 1, int idx = 0);
|
||||
void print_method(CompilerPhaseType cpt, int level = 1, int idx = 0);
|
||||
void print_method(CompilerPhaseType cpt, Node* n, int level = 3);
|
||||
|
||||
#ifndef PRODUCT
|
||||
void igv_print_method_to_file(const char* phase_name = "Debug", bool append = false);
|
||||
@ -865,10 +869,13 @@ class Compile : public Phase {
|
||||
bool allow_intrinsics = true);
|
||||
bool should_delay_inlining(ciMethod* call_method, JVMState* jvms) {
|
||||
return should_delay_string_inlining(call_method, jvms) ||
|
||||
should_delay_boxing_inlining(call_method, jvms);
|
||||
should_delay_boxing_inlining(call_method, jvms) ||
|
||||
should_delay_vector_inlining(call_method, jvms);
|
||||
}
|
||||
bool should_delay_string_inlining(ciMethod* call_method, JVMState* jvms);
|
||||
bool should_delay_boxing_inlining(ciMethod* call_method, JVMState* jvms);
|
||||
bool should_delay_vector_inlining(ciMethod* call_method, JVMState* jvms);
|
||||
bool should_delay_vector_reboxing_inlining(ciMethod* call_method, JVMState* jvms);
|
||||
|
||||
// Helper functions to identify inlining potential at call-site
|
||||
ciMethod* optimize_virtual_call(ciMethod* caller, int bci, ciInstanceKlass* klass,
|
||||
@ -940,6 +947,10 @@ class Compile : public Phase {
|
||||
_boxing_late_inlines.push(cg);
|
||||
}
|
||||
|
||||
void add_vector_reboxing_late_inline(CallGenerator* cg) {
|
||||
_vector_reboxing_late_inlines.push(cg);
|
||||
}
|
||||
|
||||
void remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful);
|
||||
|
||||
void process_print_inlining();
|
||||
@ -969,6 +980,9 @@ class Compile : public Phase {
|
||||
bool optimize_loops(PhaseIterGVN& igvn, LoopOptsMode mode);
|
||||
void remove_root_to_sfpts_edges(PhaseIterGVN& igvn);
|
||||
|
||||
void inline_vector_reboxing_calls();
|
||||
bool has_vbox_nodes();
|
||||
|
||||
// Matching, CFG layout, allocation, code generation
|
||||
PhaseCFG* cfg() { return _cfg; }
|
||||
bool has_java_calls() const { return _java_calls > 0; }
|
||||
|
||||
@ -135,6 +135,8 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
|
||||
if (cg->does_virtual_dispatch()) {
|
||||
cg_intrinsic = cg;
|
||||
cg = NULL;
|
||||
} else if (should_delay_vector_inlining(callee, jvms)) {
|
||||
return CallGenerator::for_late_inline(callee, cg);
|
||||
} else {
|
||||
return cg;
|
||||
}
|
||||
@ -185,6 +187,8 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
|
||||
return CallGenerator::for_string_late_inline(callee, cg);
|
||||
} else if (should_delay_boxing_inlining(callee, jvms)) {
|
||||
return CallGenerator::for_boxing_late_inline(callee, cg);
|
||||
} else if (should_delay_vector_reboxing_inlining(callee, jvms)) {
|
||||
return CallGenerator::for_vector_reboxing_late_inline(callee, cg);
|
||||
} else if ((should_delay || AlwaysIncrementalInline)) {
|
||||
return CallGenerator::for_late_inline(callee, cg);
|
||||
}
|
||||
@ -422,6 +426,14 @@ bool Compile::should_delay_boxing_inlining(ciMethod* call_method, JVMState* jvms
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Compile::should_delay_vector_inlining(ciMethod* call_method, JVMState* jvms) {
|
||||
return EnableVectorSupport && call_method->is_vector_method();
|
||||
}
|
||||
|
||||
bool Compile::should_delay_vector_reboxing_inlining(ciMethod* call_method, JVMState* jvms) {
|
||||
return EnableVectorSupport && (call_method->intrinsic_id() == vmIntrinsics::_VectorRebox);
|
||||
}
|
||||
|
||||
// uncommon-trap call-sites where callee is unloaded, uninitialized or will not link
|
||||
bool Parse::can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass* klass) {
|
||||
// Additional inputs to consider...
|
||||
|
||||
@ -686,6 +686,7 @@ void PhaseCFG::adjust_register_pressure(Node* n, Block* block, intptr_t* recalc_
|
||||
case Op_StoreP:
|
||||
case Op_StoreN:
|
||||
case Op_StoreVector:
|
||||
case Op_StoreVectorScatter:
|
||||
case Op_StoreNKlass:
|
||||
for (uint k = 1; k < m->req(); k++) {
|
||||
Node *in = m->in(k);
|
||||
|
||||
@ -37,15 +37,13 @@
|
||||
#include "opto/addnode.hpp"
|
||||
#include "opto/arraycopynode.hpp"
|
||||
#include "opto/c2compiler.hpp"
|
||||
#include "opto/callGenerator.hpp"
|
||||
#include "opto/castnode.hpp"
|
||||
#include "opto/cfgnode.hpp"
|
||||
#include "opto/convertnode.hpp"
|
||||
#include "opto/countbitsnode.hpp"
|
||||
#include "opto/intrinsicnode.hpp"
|
||||
#include "opto/idealKit.hpp"
|
||||
#include "opto/library_call.hpp"
|
||||
#include "opto/mathexactnode.hpp"
|
||||
#include "opto/movenode.hpp"
|
||||
#include "opto/mulnode.hpp"
|
||||
#include "opto/narrowptrnode.hpp"
|
||||
#include "opto/opaquenode.hpp"
|
||||
@ -60,291 +58,6 @@
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
|
||||
class LibraryIntrinsic : public InlineCallGenerator {
|
||||
// Extend the set of intrinsics known to the runtime:
|
||||
public:
|
||||
private:
|
||||
bool _is_virtual;
|
||||
bool _does_virtual_dispatch;
|
||||
int8_t _predicates_count; // Intrinsic is predicated by several conditions
|
||||
int8_t _last_predicate; // Last generated predicate
|
||||
vmIntrinsics::ID _intrinsic_id;
|
||||
|
||||
public:
|
||||
LibraryIntrinsic(ciMethod* m, bool is_virtual, int predicates_count, bool does_virtual_dispatch, vmIntrinsics::ID id)
|
||||
: InlineCallGenerator(m),
|
||||
_is_virtual(is_virtual),
|
||||
_does_virtual_dispatch(does_virtual_dispatch),
|
||||
_predicates_count((int8_t)predicates_count),
|
||||
_last_predicate((int8_t)-1),
|
||||
_intrinsic_id(id)
|
||||
{
|
||||
}
|
||||
virtual bool is_intrinsic() const { return true; }
|
||||
virtual bool is_virtual() const { return _is_virtual; }
|
||||
virtual bool is_predicated() const { return _predicates_count > 0; }
|
||||
virtual int predicates_count() const { return _predicates_count; }
|
||||
virtual bool does_virtual_dispatch() const { return _does_virtual_dispatch; }
|
||||
virtual JVMState* generate(JVMState* jvms);
|
||||
virtual Node* generate_predicate(JVMState* jvms, int predicate);
|
||||
vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
|
||||
};
|
||||
|
||||
|
||||
// Local helper class for LibraryIntrinsic:
|
||||
class LibraryCallKit : public GraphKit {
|
||||
private:
|
||||
LibraryIntrinsic* _intrinsic; // the library intrinsic being called
|
||||
Node* _result; // the result node, if any
|
||||
int _reexecute_sp; // the stack pointer when bytecode needs to be reexecuted
|
||||
|
||||
const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type);
|
||||
|
||||
public:
|
||||
LibraryCallKit(JVMState* jvms, LibraryIntrinsic* intrinsic)
|
||||
: GraphKit(jvms),
|
||||
_intrinsic(intrinsic),
|
||||
_result(NULL)
|
||||
{
|
||||
// Check if this is a root compile. In that case we don't have a caller.
|
||||
if (!jvms->has_method()) {
|
||||
_reexecute_sp = sp();
|
||||
} else {
|
||||
// Find out how many arguments the interpreter needs when deoptimizing
|
||||
// and save the stack pointer value so it can used by uncommon_trap.
|
||||
// We find the argument count by looking at the declared signature.
|
||||
bool ignored_will_link;
|
||||
ciSignature* declared_signature = NULL;
|
||||
ciMethod* ignored_callee = caller()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
|
||||
const int nargs = declared_signature->arg_size_for_bc(caller()->java_code_at_bci(bci()));
|
||||
_reexecute_sp = sp() + nargs; // "push" arguments back on stack
|
||||
}
|
||||
}
|
||||
|
||||
virtual LibraryCallKit* is_LibraryCallKit() const { return (LibraryCallKit*)this; }
|
||||
|
||||
ciMethod* caller() const { return jvms()->method(); }
|
||||
int bci() const { return jvms()->bci(); }
|
||||
LibraryIntrinsic* intrinsic() const { return _intrinsic; }
|
||||
vmIntrinsics::ID intrinsic_id() const { return _intrinsic->intrinsic_id(); }
|
||||
ciMethod* callee() const { return _intrinsic->method(); }
|
||||
|
||||
bool try_to_inline(int predicate);
|
||||
Node* try_to_predicate(int predicate);
|
||||
|
||||
void push_result() {
|
||||
// Push the result onto the stack.
|
||||
if (!stopped() && result() != NULL) {
|
||||
BasicType bt = result()->bottom_type()->basic_type();
|
||||
push_node(bt, result());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void fatal_unexpected_iid(vmIntrinsics::ID iid) {
|
||||
fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
|
||||
}
|
||||
|
||||
void set_result(Node* n) { assert(_result == NULL, "only set once"); _result = n; }
|
||||
void set_result(RegionNode* region, PhiNode* value);
|
||||
Node* result() { return _result; }
|
||||
|
||||
virtual int reexecute_sp() { return _reexecute_sp; }
|
||||
|
||||
// Helper functions to inline natives
|
||||
Node* generate_guard(Node* test, RegionNode* region, float true_prob);
|
||||
Node* generate_slow_guard(Node* test, RegionNode* region);
|
||||
Node* generate_fair_guard(Node* test, RegionNode* region);
|
||||
Node* generate_negative_guard(Node* index, RegionNode* region,
|
||||
// resulting CastII of index:
|
||||
Node* *pos_index = NULL);
|
||||
Node* generate_limit_guard(Node* offset, Node* subseq_length,
|
||||
Node* array_length,
|
||||
RegionNode* region);
|
||||
void generate_string_range_check(Node* array, Node* offset,
|
||||
Node* length, bool char_count);
|
||||
Node* generate_current_thread(Node* &tls_output);
|
||||
Node* load_mirror_from_klass(Node* klass);
|
||||
Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
|
||||
RegionNode* region, int null_path,
|
||||
int offset);
|
||||
Node* load_klass_from_mirror(Node* mirror, bool never_see_null,
|
||||
RegionNode* region, int null_path) {
|
||||
int offset = java_lang_Class::klass_offset();
|
||||
return load_klass_from_mirror_common(mirror, never_see_null,
|
||||
region, null_path,
|
||||
offset);
|
||||
}
|
||||
Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null,
|
||||
RegionNode* region, int null_path) {
|
||||
int offset = java_lang_Class::array_klass_offset();
|
||||
return load_klass_from_mirror_common(mirror, never_see_null,
|
||||
region, null_path,
|
||||
offset);
|
||||
}
|
||||
Node* generate_access_flags_guard(Node* kls,
|
||||
int modifier_mask, int modifier_bits,
|
||||
RegionNode* region);
|
||||
Node* generate_interface_guard(Node* kls, RegionNode* region);
|
||||
Node* generate_hidden_class_guard(Node* kls, RegionNode* region);
|
||||
Node* generate_array_guard(Node* kls, RegionNode* region) {
|
||||
return generate_array_guard_common(kls, region, false, false);
|
||||
}
|
||||
Node* generate_non_array_guard(Node* kls, RegionNode* region) {
|
||||
return generate_array_guard_common(kls, region, false, true);
|
||||
}
|
||||
Node* generate_objArray_guard(Node* kls, RegionNode* region) {
|
||||
return generate_array_guard_common(kls, region, true, false);
|
||||
}
|
||||
Node* generate_non_objArray_guard(Node* kls, RegionNode* region) {
|
||||
return generate_array_guard_common(kls, region, true, true);
|
||||
}
|
||||
Node* generate_array_guard_common(Node* kls, RegionNode* region,
|
||||
bool obj_array, bool not_array);
|
||||
Node* generate_virtual_guard(Node* obj_klass, RegionNode* slow_region);
|
||||
CallJavaNode* generate_method_call(vmIntrinsics::ID method_id,
|
||||
bool is_virtual = false, bool is_static = false);
|
||||
CallJavaNode* generate_method_call_static(vmIntrinsics::ID method_id) {
|
||||
return generate_method_call(method_id, false, true);
|
||||
}
|
||||
CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
|
||||
return generate_method_call(method_id, true, false);
|
||||
}
|
||||
Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
|
||||
Node * field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
|
||||
|
||||
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2, StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_compareTo(StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_indexOf(StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae);
|
||||
Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count,
|
||||
RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_equals(StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_toBytesU();
|
||||
bool inline_string_getCharsU();
|
||||
bool inline_string_copy(bool compress);
|
||||
bool inline_string_char_access(bool is_store);
|
||||
Node* round_double_node(Node* n);
|
||||
bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
|
||||
bool inline_math_native(vmIntrinsics::ID id);
|
||||
bool inline_math(vmIntrinsics::ID id);
|
||||
bool inline_double_math(vmIntrinsics::ID id);
|
||||
template <typename OverflowOp>
|
||||
bool inline_math_overflow(Node* arg1, Node* arg2);
|
||||
void inline_math_mathExact(Node* math, Node* test);
|
||||
bool inline_math_addExactI(bool is_increment);
|
||||
bool inline_math_addExactL(bool is_increment);
|
||||
bool inline_math_multiplyExactI();
|
||||
bool inline_math_multiplyExactL();
|
||||
bool inline_math_multiplyHigh();
|
||||
bool inline_math_negateExactI();
|
||||
bool inline_math_negateExactL();
|
||||
bool inline_math_subtractExactI(bool is_decrement);
|
||||
bool inline_math_subtractExactL(bool is_decrement);
|
||||
bool inline_min_max(vmIntrinsics::ID id);
|
||||
bool inline_notify(vmIntrinsics::ID id);
|
||||
Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
|
||||
// This returns Type::AnyPtr, RawPtr, or OopPtr.
|
||||
int classify_unsafe_addr(Node* &base, Node* &offset, BasicType type);
|
||||
Node* make_unsafe_address(Node*& base, Node* offset, DecoratorSet decorators, BasicType type = T_ILLEGAL, bool can_cast = false);
|
||||
|
||||
typedef enum { Relaxed, Opaque, Volatile, Acquire, Release } AccessKind;
|
||||
DecoratorSet mo_decorator_for_access_kind(AccessKind kind);
|
||||
bool inline_unsafe_access(bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
|
||||
static bool klass_needs_init_guard(Node* kls);
|
||||
bool inline_unsafe_allocate();
|
||||
bool inline_unsafe_newArray(bool uninitialized);
|
||||
bool inline_unsafe_writeback0();
|
||||
bool inline_unsafe_writebackSync0(bool is_pre);
|
||||
bool inline_unsafe_copyMemory();
|
||||
bool inline_native_currentThread();
|
||||
|
||||
bool inline_native_time_funcs(address method, const char* funcName);
|
||||
#ifdef JFR_HAVE_INTRINSICS
|
||||
bool inline_native_classID();
|
||||
bool inline_native_getEventWriter();
|
||||
#endif
|
||||
bool inline_native_Class_query(vmIntrinsics::ID id);
|
||||
bool inline_native_subtype_check();
|
||||
bool inline_native_getLength();
|
||||
bool inline_array_copyOf(bool is_copyOfRange);
|
||||
bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_preconditions_checkIndex();
|
||||
void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array);
|
||||
bool inline_native_clone(bool is_virtual);
|
||||
bool inline_native_Reflection_getCallerClass();
|
||||
// Helper function for inlining native object hash method
|
||||
bool inline_native_hashcode(bool is_virtual, bool is_static);
|
||||
bool inline_native_getClass();
|
||||
|
||||
// Helper functions for inlining arraycopy
|
||||
bool inline_arraycopy();
|
||||
AllocateArrayNode* tightly_coupled_allocation(Node* ptr,
|
||||
RegionNode* slow_region);
|
||||
JVMState* arraycopy_restore_alloc_state(AllocateArrayNode* alloc, int& saved_reexecute_sp);
|
||||
void arraycopy_move_allocation_here(AllocateArrayNode* alloc, Node* dest, JVMState* saved_jvms, int saved_reexecute_sp,
|
||||
uint new_idx);
|
||||
|
||||
typedef enum { LS_get_add, LS_get_set, LS_cmp_swap, LS_cmp_swap_weak, LS_cmp_exchange } LoadStoreKind;
|
||||
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind, AccessKind access_kind);
|
||||
bool inline_unsafe_fence(vmIntrinsics::ID id);
|
||||
bool inline_onspinwait();
|
||||
bool inline_fp_conversions(vmIntrinsics::ID id);
|
||||
bool inline_number_methods(vmIntrinsics::ID id);
|
||||
bool inline_reference_get();
|
||||
bool inline_Class_cast();
|
||||
bool inline_aescrypt_Block(vmIntrinsics::ID id);
|
||||
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
|
||||
bool inline_electronicCodeBook_AESCrypt(vmIntrinsics::ID id);
|
||||
bool inline_counterMode_AESCrypt(vmIntrinsics::ID id);
|
||||
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
|
||||
Node* inline_electronicCodeBook_AESCrypt_predicate(bool decrypting);
|
||||
Node* inline_counterMode_AESCrypt_predicate();
|
||||
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
|
||||
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
|
||||
bool inline_ghash_processBlocks();
|
||||
bool inline_base64_encodeBlock();
|
||||
bool inline_digestBase_implCompress(vmIntrinsics::ID id);
|
||||
bool inline_digestBase_implCompressMB(int predicate);
|
||||
bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass,
|
||||
bool long_state, address stubAddr, const char *stubName,
|
||||
Node* src_start, Node* ofs, Node* limit);
|
||||
Node* get_state_from_digest_object(Node *digestBase_object);
|
||||
Node* get_long_state_from_digest_object(Node *digestBase_object);
|
||||
Node* inline_digestBase_implCompressMB_predicate(int predicate);
|
||||
bool inline_encodeISOArray();
|
||||
bool inline_updateCRC32();
|
||||
bool inline_updateBytesCRC32();
|
||||
bool inline_updateByteBufferCRC32();
|
||||
Node* get_table_from_crc32c_class(ciInstanceKlass *crc32c_class);
|
||||
bool inline_updateBytesCRC32C();
|
||||
bool inline_updateDirectByteBufferCRC32C();
|
||||
bool inline_updateBytesAdler32();
|
||||
bool inline_updateByteBufferAdler32();
|
||||
bool inline_multiplyToLen();
|
||||
bool inline_hasNegatives();
|
||||
bool inline_squareToLen();
|
||||
bool inline_mulAdd();
|
||||
bool inline_montgomeryMultiply();
|
||||
bool inline_montgomerySquare();
|
||||
bool inline_bigIntegerShift(bool isRightShift);
|
||||
bool inline_vectorizedMismatch();
|
||||
bool inline_fma(vmIntrinsics::ID id);
|
||||
bool inline_character_compare(vmIntrinsics::ID id);
|
||||
bool inline_fp_min_max(vmIntrinsics::ID id);
|
||||
|
||||
bool inline_profileBoolean();
|
||||
bool inline_isCompileConstant();
|
||||
void clear_upper_avx() {
|
||||
#ifdef X86
|
||||
if (UseAVX >= 2) {
|
||||
C->set_clear_upper_avx(true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
//---------------------------make_vm_intrinsic----------------------------
|
||||
CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
|
||||
vmIntrinsics::ID id = m->intrinsic_id();
|
||||
@ -453,6 +166,7 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
|
||||
}
|
||||
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
|
||||
C->print_inlining_update(this);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -530,7 +244,6 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
}
|
||||
assert(merged_memory(), "");
|
||||
|
||||
|
||||
switch (intrinsic_id()) {
|
||||
case vmIntrinsics::_hashCode: return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
|
||||
case vmIntrinsics::_identityHashCode: return inline_native_hashcode(/*!virtual*/ false, is_static);
|
||||
@ -912,6 +625,45 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
case vmIntrinsics::_minD:
|
||||
return inline_fp_min_max(intrinsic_id());
|
||||
|
||||
case vmIntrinsics::_VectorUnaryOp:
|
||||
return inline_vector_nary_operation(1);
|
||||
case vmIntrinsics::_VectorBinaryOp:
|
||||
return inline_vector_nary_operation(2);
|
||||
case vmIntrinsics::_VectorTernaryOp:
|
||||
return inline_vector_nary_operation(3);
|
||||
case vmIntrinsics::_VectorBroadcastCoerced:
|
||||
return inline_vector_broadcast_coerced();
|
||||
case vmIntrinsics::_VectorShuffleIota:
|
||||
return inline_vector_shuffle_iota();
|
||||
case vmIntrinsics::_VectorShuffleToVector:
|
||||
return inline_vector_shuffle_to_vector();
|
||||
case vmIntrinsics::_VectorLoadOp:
|
||||
return inline_vector_mem_operation(/*is_store=*/false);
|
||||
case vmIntrinsics::_VectorStoreOp:
|
||||
return inline_vector_mem_operation(/*is_store=*/true);
|
||||
case vmIntrinsics::_VectorGatherOp:
|
||||
return inline_vector_gather_scatter(/*is_scatter*/ false);
|
||||
case vmIntrinsics::_VectorScatterOp:
|
||||
return inline_vector_gather_scatter(/*is_scatter*/ true);
|
||||
case vmIntrinsics::_VectorReductionCoerced:
|
||||
return inline_vector_reduction();
|
||||
case vmIntrinsics::_VectorTest:
|
||||
return inline_vector_test();
|
||||
case vmIntrinsics::_VectorBlend:
|
||||
return inline_vector_blend();
|
||||
case vmIntrinsics::_VectorRearrange:
|
||||
return inline_vector_rearrange();
|
||||
case vmIntrinsics::_VectorCompare:
|
||||
return inline_vector_compare();
|
||||
case vmIntrinsics::_VectorBroadcastInt:
|
||||
return inline_vector_broadcast_int();
|
||||
case vmIntrinsics::_VectorConvert:
|
||||
return inline_vector_convert();
|
||||
case vmIntrinsics::_VectorInsert:
|
||||
return inline_vector_insert();
|
||||
case vmIntrinsics::_VectorExtract:
|
||||
return inline_vector_extract();
|
||||
|
||||
default:
|
||||
// If you get here, it may be that someone has added a new intrinsic
|
||||
// to the list in vmSymbols.hpp without implementing it here.
|
||||
@ -2255,7 +2007,7 @@ LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset, BasicType type)
|
||||
}
|
||||
}
|
||||
|
||||
inline Node* LibraryCallKit::make_unsafe_address(Node*& base, Node* offset, DecoratorSet decorators, BasicType type, bool can_cast) {
|
||||
Node* LibraryCallKit::make_unsafe_address(Node*& base, Node* offset, DecoratorSet decorators, BasicType type, bool can_cast) {
|
||||
Node* uncasted_base = base;
|
||||
int kind = classify_unsafe_addr(uncasted_base, offset, type);
|
||||
if (kind == Type::RawPtr) {
|
||||
|
||||
348
src/hotspot/share/opto/library_call.hpp
Normal file
348
src/hotspot/share/opto/library_call.hpp
Normal file
@ -0,0 +1,348 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "ci/ciMethod.hpp"
|
||||
#include "classfile/javaClasses.hpp"
|
||||
#include "opto/callGenerator.hpp"
|
||||
#include "opto/graphKit.hpp"
|
||||
#include "opto/castnode.hpp"
|
||||
#include "opto/convertnode.hpp"
|
||||
#include "opto/intrinsicnode.hpp"
|
||||
#include "opto/movenode.hpp"
|
||||
|
||||
class LibraryIntrinsic : public InlineCallGenerator {
|
||||
// Extend the set of intrinsics known to the runtime:
|
||||
public:
|
||||
private:
|
||||
bool _is_virtual;
|
||||
bool _does_virtual_dispatch;
|
||||
int8_t _predicates_count; // Intrinsic is predicated by several conditions
|
||||
int8_t _last_predicate; // Last generated predicate
|
||||
vmIntrinsics::ID _intrinsic_id;
|
||||
|
||||
public:
|
||||
LibraryIntrinsic(ciMethod* m, bool is_virtual, int predicates_count, bool does_virtual_dispatch, vmIntrinsics::ID id)
|
||||
: InlineCallGenerator(m),
|
||||
_is_virtual(is_virtual),
|
||||
_does_virtual_dispatch(does_virtual_dispatch),
|
||||
_predicates_count((int8_t)predicates_count),
|
||||
_last_predicate((int8_t)-1),
|
||||
_intrinsic_id(id)
|
||||
{
|
||||
}
|
||||
virtual bool is_intrinsic() const { return true; }
|
||||
virtual bool is_virtual() const { return _is_virtual; }
|
||||
virtual bool is_predicated() const { return _predicates_count > 0; }
|
||||
virtual int predicates_count() const { return _predicates_count; }
|
||||
virtual bool does_virtual_dispatch() const { return _does_virtual_dispatch; }
|
||||
virtual JVMState* generate(JVMState* jvms);
|
||||
virtual Node* generate_predicate(JVMState* jvms, int predicate);
|
||||
vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
|
||||
};
|
||||
|
||||
|
||||
// Local helper class for LibraryIntrinsic:
|
||||
class LibraryCallKit : public GraphKit {
|
||||
private:
|
||||
LibraryIntrinsic* _intrinsic; // the library intrinsic being called
|
||||
Node* _result; // the result node, if any
|
||||
int _reexecute_sp; // the stack pointer when bytecode needs to be reexecuted
|
||||
|
||||
const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type);
|
||||
|
||||
public:
|
||||
LibraryCallKit(JVMState* jvms, LibraryIntrinsic* intrinsic)
|
||||
: GraphKit(jvms),
|
||||
_intrinsic(intrinsic),
|
||||
_result(NULL)
|
||||
{
|
||||
// Check if this is a root compile. In that case we don't have a caller.
|
||||
if (!jvms->has_method()) {
|
||||
_reexecute_sp = sp();
|
||||
} else {
|
||||
// Find out how many arguments the interpreter needs when deoptimizing
|
||||
// and save the stack pointer value so it can used by uncommon_trap.
|
||||
// We find the argument count by looking at the declared signature.
|
||||
bool ignored_will_link;
|
||||
ciSignature* declared_signature = NULL;
|
||||
ciMethod* ignored_callee = caller()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
|
||||
const int nargs = declared_signature->arg_size_for_bc(caller()->java_code_at_bci(bci()));
|
||||
_reexecute_sp = sp() + nargs; // "push" arguments back on stack
|
||||
}
|
||||
}
|
||||
|
||||
virtual LibraryCallKit* is_LibraryCallKit() const { return (LibraryCallKit*)this; }
|
||||
|
||||
ciMethod* caller() const { return jvms()->method(); }
|
||||
int bci() const { return jvms()->bci(); }
|
||||
LibraryIntrinsic* intrinsic() const { return _intrinsic; }
|
||||
vmIntrinsics::ID intrinsic_id() const { return _intrinsic->intrinsic_id(); }
|
||||
ciMethod* callee() const { return _intrinsic->method(); }
|
||||
|
||||
bool try_to_inline(int predicate);
|
||||
Node* try_to_predicate(int predicate);
|
||||
|
||||
void push_result() {
|
||||
// Push the result onto the stack.
|
||||
if (!stopped() && result() != NULL) {
|
||||
BasicType bt = result()->bottom_type()->basic_type();
|
||||
push_node(bt, result());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void fatal_unexpected_iid(vmIntrinsics::ID iid) {
|
||||
fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
|
||||
}
|
||||
|
||||
void set_result(Node* n) { assert(_result == NULL, "only set once"); _result = n; }
|
||||
void set_result(RegionNode* region, PhiNode* value);
|
||||
Node* result() { return _result; }
|
||||
|
||||
virtual int reexecute_sp() { return _reexecute_sp; }
|
||||
|
||||
// Helper functions to inline natives
|
||||
Node* generate_guard(Node* test, RegionNode* region, float true_prob);
|
||||
Node* generate_slow_guard(Node* test, RegionNode* region);
|
||||
Node* generate_fair_guard(Node* test, RegionNode* region);
|
||||
Node* generate_negative_guard(Node* index, RegionNode* region,
|
||||
// resulting CastII of index:
|
||||
Node* *pos_index = NULL);
|
||||
Node* generate_limit_guard(Node* offset, Node* subseq_length,
|
||||
Node* array_length,
|
||||
RegionNode* region);
|
||||
void generate_string_range_check(Node* array, Node* offset,
|
||||
Node* length, bool char_count);
|
||||
Node* generate_current_thread(Node* &tls_output);
|
||||
Node* load_mirror_from_klass(Node* klass);
|
||||
Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
|
||||
RegionNode* region, int null_path,
|
||||
int offset);
|
||||
Node* load_klass_from_mirror(Node* mirror, bool never_see_null,
|
||||
RegionNode* region, int null_path) {
|
||||
int offset = java_lang_Class::klass_offset();
|
||||
return load_klass_from_mirror_common(mirror, never_see_null,
|
||||
region, null_path,
|
||||
offset);
|
||||
}
|
||||
Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null,
|
||||
RegionNode* region, int null_path) {
|
||||
int offset = java_lang_Class::array_klass_offset();
|
||||
return load_klass_from_mirror_common(mirror, never_see_null,
|
||||
region, null_path,
|
||||
offset);
|
||||
}
|
||||
Node* generate_access_flags_guard(Node* kls,
|
||||
int modifier_mask, int modifier_bits,
|
||||
RegionNode* region);
|
||||
Node* generate_interface_guard(Node* kls, RegionNode* region);
|
||||
Node* generate_hidden_class_guard(Node* kls, RegionNode* region);
|
||||
Node* generate_array_guard(Node* kls, RegionNode* region) {
|
||||
return generate_array_guard_common(kls, region, false, false);
|
||||
}
|
||||
Node* generate_non_array_guard(Node* kls, RegionNode* region) {
|
||||
return generate_array_guard_common(kls, region, false, true);
|
||||
}
|
||||
Node* generate_objArray_guard(Node* kls, RegionNode* region) {
|
||||
return generate_array_guard_common(kls, region, true, false);
|
||||
}
|
||||
Node* generate_non_objArray_guard(Node* kls, RegionNode* region) {
|
||||
return generate_array_guard_common(kls, region, true, true);
|
||||
}
|
||||
Node* generate_array_guard_common(Node* kls, RegionNode* region,
|
||||
bool obj_array, bool not_array);
|
||||
Node* generate_virtual_guard(Node* obj_klass, RegionNode* slow_region);
|
||||
CallJavaNode* generate_method_call(vmIntrinsics::ID method_id,
|
||||
bool is_virtual = false, bool is_static = false);
|
||||
CallJavaNode* generate_method_call_static(vmIntrinsics::ID method_id) {
|
||||
return generate_method_call(method_id, false, true);
|
||||
}
|
||||
CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
|
||||
return generate_method_call(method_id, true, false);
|
||||
}
|
||||
Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
|
||||
Node * field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
|
||||
|
||||
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2, StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_compareTo(StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_indexOf(StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae);
|
||||
Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count,
|
||||
RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_equals(StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_string_toBytesU();
|
||||
bool inline_string_getCharsU();
|
||||
bool inline_string_copy(bool compress);
|
||||
bool inline_string_char_access(bool is_store);
|
||||
Node* round_double_node(Node* n);
|
||||
bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
|
||||
bool inline_math_native(vmIntrinsics::ID id);
|
||||
bool inline_math(vmIntrinsics::ID id);
|
||||
bool inline_double_math(vmIntrinsics::ID id);
|
||||
template <typename OverflowOp>
|
||||
bool inline_math_overflow(Node* arg1, Node* arg2);
|
||||
void inline_math_mathExact(Node* math, Node* test);
|
||||
bool inline_math_addExactI(bool is_increment);
|
||||
bool inline_math_addExactL(bool is_increment);
|
||||
bool inline_math_multiplyExactI();
|
||||
bool inline_math_multiplyExactL();
|
||||
bool inline_math_multiplyHigh();
|
||||
bool inline_math_negateExactI();
|
||||
bool inline_math_negateExactL();
|
||||
bool inline_math_subtractExactI(bool is_decrement);
|
||||
bool inline_math_subtractExactL(bool is_decrement);
|
||||
bool inline_min_max(vmIntrinsics::ID id);
|
||||
bool inline_notify(vmIntrinsics::ID id);
|
||||
Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
|
||||
// This returns Type::AnyPtr, RawPtr, or OopPtr.
|
||||
int classify_unsafe_addr(Node* &base, Node* &offset, BasicType type);
|
||||
Node* make_unsafe_address(Node*& base, Node* offset, DecoratorSet decorators, BasicType type = T_ILLEGAL, bool can_cast = false);
|
||||
|
||||
typedef enum { Relaxed, Opaque, Volatile, Acquire, Release } AccessKind;
|
||||
DecoratorSet mo_decorator_for_access_kind(AccessKind kind);
|
||||
bool inline_unsafe_access(bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
|
||||
static bool klass_needs_init_guard(Node* kls);
|
||||
bool inline_unsafe_allocate();
|
||||
bool inline_unsafe_newArray(bool uninitialized);
|
||||
bool inline_unsafe_writeback0();
|
||||
bool inline_unsafe_writebackSync0(bool is_pre);
|
||||
bool inline_unsafe_copyMemory();
|
||||
bool inline_native_currentThread();
|
||||
|
||||
bool inline_native_time_funcs(address method, const char* funcName);
|
||||
#ifdef JFR_HAVE_INTRINSICS
|
||||
bool inline_native_classID();
|
||||
bool inline_native_getEventWriter();
|
||||
#endif
|
||||
bool inline_native_Class_query(vmIntrinsics::ID id);
|
||||
bool inline_native_subtype_check();
|
||||
bool inline_native_getLength();
|
||||
bool inline_array_copyOf(bool is_copyOfRange);
|
||||
bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
|
||||
bool inline_preconditions_checkIndex();
|
||||
void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array);
|
||||
bool inline_native_clone(bool is_virtual);
|
||||
bool inline_native_Reflection_getCallerClass();
|
||||
// Helper function for inlining native object hash method
|
||||
bool inline_native_hashcode(bool is_virtual, bool is_static);
|
||||
bool inline_native_getClass();
|
||||
|
||||
// Helper functions for inlining arraycopy
|
||||
bool inline_arraycopy();
|
||||
AllocateArrayNode* tightly_coupled_allocation(Node* ptr,
|
||||
RegionNode* slow_region);
|
||||
JVMState* arraycopy_restore_alloc_state(AllocateArrayNode* alloc, int& saved_reexecute_sp);
|
||||
void arraycopy_move_allocation_here(AllocateArrayNode* alloc, Node* dest, JVMState* saved_jvms, int saved_reexecute_sp,
|
||||
uint new_idx);
|
||||
|
||||
typedef enum { LS_get_add, LS_get_set, LS_cmp_swap, LS_cmp_swap_weak, LS_cmp_exchange } LoadStoreKind;
|
||||
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind, AccessKind access_kind);
|
||||
bool inline_unsafe_fence(vmIntrinsics::ID id);
|
||||
bool inline_onspinwait();
|
||||
bool inline_fp_conversions(vmIntrinsics::ID id);
|
||||
bool inline_number_methods(vmIntrinsics::ID id);
|
||||
bool inline_reference_get();
|
||||
bool inline_Class_cast();
|
||||
bool inline_aescrypt_Block(vmIntrinsics::ID id);
|
||||
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
|
||||
bool inline_electronicCodeBook_AESCrypt(vmIntrinsics::ID id);
|
||||
bool inline_counterMode_AESCrypt(vmIntrinsics::ID id);
|
||||
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
|
||||
Node* inline_electronicCodeBook_AESCrypt_predicate(bool decrypting);
|
||||
Node* inline_counterMode_AESCrypt_predicate();
|
||||
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
|
||||
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
|
||||
bool inline_ghash_processBlocks();
|
||||
bool inline_base64_encodeBlock();
|
||||
bool inline_digestBase_implCompress(vmIntrinsics::ID id);
|
||||
bool inline_digestBase_implCompressMB(int predicate);
|
||||
bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass,
|
||||
bool long_state, address stubAddr, const char *stubName,
|
||||
Node* src_start, Node* ofs, Node* limit);
|
||||
Node* get_state_from_digest_object(Node *digestBase_object);
|
||||
Node* get_long_state_from_digest_object(Node *digestBase_object);
|
||||
Node* inline_digestBase_implCompressMB_predicate(int predicate);
|
||||
bool inline_encodeISOArray();
|
||||
bool inline_updateCRC32();
|
||||
bool inline_updateBytesCRC32();
|
||||
bool inline_updateByteBufferCRC32();
|
||||
Node* get_table_from_crc32c_class(ciInstanceKlass *crc32c_class);
|
||||
bool inline_updateBytesCRC32C();
|
||||
bool inline_updateDirectByteBufferCRC32C();
|
||||
bool inline_updateBytesAdler32();
|
||||
bool inline_updateByteBufferAdler32();
|
||||
bool inline_multiplyToLen();
|
||||
bool inline_hasNegatives();
|
||||
bool inline_squareToLen();
|
||||
bool inline_mulAdd();
|
||||
bool inline_montgomeryMultiply();
|
||||
bool inline_montgomerySquare();
|
||||
bool inline_bigIntegerShift(bool isRightShift);
|
||||
bool inline_vectorizedMismatch();
|
||||
bool inline_fma(vmIntrinsics::ID id);
|
||||
bool inline_character_compare(vmIntrinsics::ID id);
|
||||
bool inline_fp_min_max(vmIntrinsics::ID id);
|
||||
|
||||
bool inline_profileBoolean();
|
||||
bool inline_isCompileConstant();
|
||||
|
||||
// Vector API support
|
||||
bool inline_vector_nary_operation(int n);
|
||||
bool inline_vector_broadcast_coerced();
|
||||
bool inline_vector_shuffle_to_vector();
|
||||
bool inline_vector_shuffle_iota();
|
||||
bool inline_vector_mem_operation(bool is_store);
|
||||
bool inline_vector_gather_scatter(bool is_scatter);
|
||||
bool inline_vector_reduction();
|
||||
bool inline_vector_test();
|
||||
bool inline_vector_blend();
|
||||
bool inline_vector_rearrange();
|
||||
bool inline_vector_compare();
|
||||
bool inline_vector_broadcast_int();
|
||||
bool inline_vector_convert();
|
||||
bool inline_vector_extract();
|
||||
bool inline_vector_insert();
|
||||
Node* box_vector(Node* in, const TypeInstPtr* vbox_type, BasicType bt, int num_elem);
|
||||
Node* unbox_vector(Node* in, const TypeInstPtr* vbox_type, BasicType bt, int num_elem, bool shuffle_to_vector = false);
|
||||
Node* shift_count(Node* cnt, int shift_op, BasicType bt, int num_elem);
|
||||
|
||||
enum VectorMaskUseType {
|
||||
VecMaskUseLoad,
|
||||
VecMaskUseStore,
|
||||
VecMaskUseAll,
|
||||
VecMaskNotUsed
|
||||
};
|
||||
|
||||
bool arch_supports_vector(int op, int num_elem, BasicType type, VectorMaskUseType mask_use_type, bool has_scalar_args = false);
|
||||
|
||||
void clear_upper_avx() {
|
||||
#ifdef X86
|
||||
if (UseAVX >= 2) {
|
||||
C->set_clear_upper_avx(true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
@ -430,7 +430,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
|
||||
return rms;
|
||||
}
|
||||
|
||||
#define NOF_STACK_MASKS (3*6+6)
|
||||
#define NOF_STACK_MASKS (3*12)
|
||||
|
||||
// Create the initial stack mask used by values spilling to the stack.
|
||||
// Disallow any debug info in outgoing argument areas by setting the
|
||||
@ -473,6 +473,20 @@ void Matcher::init_first_stack_mask() {
|
||||
idealreg2spillmask [Op_VecY] = &rms[22];
|
||||
idealreg2spillmask [Op_VecZ] = &rms[23];
|
||||
|
||||
idealreg2debugmask [Op_VecA] = &rms[24];
|
||||
idealreg2debugmask [Op_VecS] = &rms[25];
|
||||
idealreg2debugmask [Op_VecD] = &rms[26];
|
||||
idealreg2debugmask [Op_VecX] = &rms[27];
|
||||
idealreg2debugmask [Op_VecY] = &rms[28];
|
||||
idealreg2debugmask [Op_VecZ] = &rms[29];
|
||||
|
||||
idealreg2mhdebugmask[Op_VecA] = &rms[30];
|
||||
idealreg2mhdebugmask[Op_VecS] = &rms[31];
|
||||
idealreg2mhdebugmask[Op_VecD] = &rms[32];
|
||||
idealreg2mhdebugmask[Op_VecX] = &rms[33];
|
||||
idealreg2mhdebugmask[Op_VecY] = &rms[34];
|
||||
idealreg2mhdebugmask[Op_VecZ] = &rms[35];
|
||||
|
||||
OptoReg::Name i;
|
||||
|
||||
// At first, start with the empty mask
|
||||
@ -520,13 +534,19 @@ void Matcher::init_first_stack_mask() {
|
||||
if (Matcher::vector_size_supported(T_BYTE,4)) {
|
||||
*idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
|
||||
idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
|
||||
} else {
|
||||
*idealreg2spillmask[Op_VecS] = RegMask::Empty;
|
||||
}
|
||||
|
||||
if (Matcher::vector_size_supported(T_FLOAT,2)) {
|
||||
// For VecD we need dual alignment and 8 bytes (2 slots) for spills.
|
||||
// RA guarantees such alignment since it is needed for Double and Long values.
|
||||
*idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD];
|
||||
idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask);
|
||||
} else {
|
||||
*idealreg2spillmask[Op_VecD] = RegMask::Empty;
|
||||
}
|
||||
|
||||
if (Matcher::vector_size_supported(T_FLOAT,4)) {
|
||||
// For VecX we need quadro alignment and 16 bytes (4 slots) for spills.
|
||||
//
|
||||
@ -544,7 +564,10 @@ void Matcher::init_first_stack_mask() {
|
||||
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
|
||||
*idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX];
|
||||
idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask);
|
||||
} else {
|
||||
*idealreg2spillmask[Op_VecX] = RegMask::Empty;
|
||||
}
|
||||
|
||||
if (Matcher::vector_size_supported(T_FLOAT,8)) {
|
||||
// For VecY we need octo alignment and 32 bytes (8 slots) for spills.
|
||||
OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
|
||||
@ -556,7 +579,10 @@ void Matcher::init_first_stack_mask() {
|
||||
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
|
||||
*idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY];
|
||||
idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask);
|
||||
} else {
|
||||
*idealreg2spillmask[Op_VecY] = RegMask::Empty;
|
||||
}
|
||||
|
||||
if (Matcher::vector_size_supported(T_FLOAT,16)) {
|
||||
// For VecZ we need enough alignment and 64 bytes (16 slots) for spills.
|
||||
OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
|
||||
@ -568,6 +594,8 @@ void Matcher::init_first_stack_mask() {
|
||||
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
|
||||
*idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ];
|
||||
idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask);
|
||||
} else {
|
||||
*idealreg2spillmask[Op_VecZ] = RegMask::Empty;
|
||||
}
|
||||
|
||||
if (Matcher::supports_scalable_vector()) {
|
||||
@ -622,6 +650,13 @@ void Matcher::init_first_stack_mask() {
|
||||
*idealreg2debugmask [Op_RegD] = *idealreg2spillmask[Op_RegD];
|
||||
*idealreg2debugmask [Op_RegP] = *idealreg2spillmask[Op_RegP];
|
||||
|
||||
*idealreg2debugmask [Op_VecA] = *idealreg2spillmask[Op_VecA];
|
||||
*idealreg2debugmask [Op_VecS] = *idealreg2spillmask[Op_VecS];
|
||||
*idealreg2debugmask [Op_VecD] = *idealreg2spillmask[Op_VecD];
|
||||
*idealreg2debugmask [Op_VecX] = *idealreg2spillmask[Op_VecX];
|
||||
*idealreg2debugmask [Op_VecY] = *idealreg2spillmask[Op_VecY];
|
||||
*idealreg2debugmask [Op_VecZ] = *idealreg2spillmask[Op_VecZ];
|
||||
|
||||
*idealreg2mhdebugmask[Op_RegN] = *idealreg2spillmask[Op_RegN];
|
||||
*idealreg2mhdebugmask[Op_RegI] = *idealreg2spillmask[Op_RegI];
|
||||
*idealreg2mhdebugmask[Op_RegL] = *idealreg2spillmask[Op_RegL];
|
||||
@ -629,6 +664,13 @@ void Matcher::init_first_stack_mask() {
|
||||
*idealreg2mhdebugmask[Op_RegD] = *idealreg2spillmask[Op_RegD];
|
||||
*idealreg2mhdebugmask[Op_RegP] = *idealreg2spillmask[Op_RegP];
|
||||
|
||||
*idealreg2mhdebugmask[Op_VecA] = *idealreg2spillmask[Op_VecA];
|
||||
*idealreg2mhdebugmask[Op_VecS] = *idealreg2spillmask[Op_VecS];
|
||||
*idealreg2mhdebugmask[Op_VecD] = *idealreg2spillmask[Op_VecD];
|
||||
*idealreg2mhdebugmask[Op_VecX] = *idealreg2spillmask[Op_VecX];
|
||||
*idealreg2mhdebugmask[Op_VecY] = *idealreg2spillmask[Op_VecY];
|
||||
*idealreg2mhdebugmask[Op_VecZ] = *idealreg2spillmask[Op_VecZ];
|
||||
|
||||
// Prevent stub compilations from attempting to reference
|
||||
// callee-saved (SOE) registers from debug info
|
||||
bool exclude_soe = !Compile::current()->is_method_compilation();
|
||||
@ -642,12 +684,26 @@ void Matcher::init_first_stack_mask() {
|
||||
idealreg2debugmask[Op_RegD]->SUBTRACT(*caller_save_mask);
|
||||
idealreg2debugmask[Op_RegP]->SUBTRACT(*caller_save_mask);
|
||||
|
||||
idealreg2debugmask[Op_VecA]->SUBTRACT(*caller_save_mask);
|
||||
idealreg2debugmask[Op_VecS]->SUBTRACT(*caller_save_mask);
|
||||
idealreg2debugmask[Op_VecD]->SUBTRACT(*caller_save_mask);
|
||||
idealreg2debugmask[Op_VecX]->SUBTRACT(*caller_save_mask);
|
||||
idealreg2debugmask[Op_VecY]->SUBTRACT(*caller_save_mask);
|
||||
idealreg2debugmask[Op_VecZ]->SUBTRACT(*caller_save_mask);
|
||||
|
||||
idealreg2mhdebugmask[Op_RegN]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_RegI]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_RegL]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_RegF]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_RegD]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_RegP]->SUBTRACT(*mh_caller_save_mask);
|
||||
|
||||
idealreg2mhdebugmask[Op_VecA]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_VecS]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_VecD]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_VecX]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_VecY]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_VecZ]->SUBTRACT(*mh_caller_save_mask);
|
||||
}
|
||||
|
||||
//---------------------------is_save_on_entry----------------------------------
|
||||
@ -1953,7 +2009,6 @@ bool Matcher::is_vshift_con_pattern(Node *n, Node *m) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool Matcher::clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
|
||||
// Must clone all producers of flags, or we will not match correctly.
|
||||
// Suppose a compare setting int-flags is shared (e.g., a switch-tree)
|
||||
@ -2308,8 +2363,28 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
|
||||
n->del_req(3);
|
||||
break;
|
||||
}
|
||||
case Op_VectorBlend:
|
||||
case Op_VectorInsert: {
|
||||
Node* pair = new BinaryNode(n->in(1), n->in(2));
|
||||
n->set_req(1, pair);
|
||||
n->set_req(2, n->in(3));
|
||||
n->del_req(3);
|
||||
break;
|
||||
}
|
||||
case Op_StoreVectorScatter: {
|
||||
Node* pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1));
|
||||
n->set_req(MemNode::ValueIn, pair);
|
||||
n->del_req(MemNode::ValueIn+1);
|
||||
break;
|
||||
}
|
||||
case Op_VectorMaskCmp: {
|
||||
n->set_req(1, new BinaryNode(n->in(1), n->in(2)));
|
||||
n->set_req(2, n->in(3));
|
||||
n->del_req(3);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -345,6 +345,9 @@ public:
|
||||
// Vector ideal reg
|
||||
static const uint vector_ideal_reg(int len);
|
||||
|
||||
// Does the CPU supports vector variable shift instructions?
|
||||
static bool supports_vector_variable_shifts(void);
|
||||
|
||||
// CPU supports misaligned vectors store/load.
|
||||
static const bool misaligned_vectors_ok();
|
||||
|
||||
|
||||
@ -641,7 +641,8 @@ Node* MemNode::find_previous_store(PhaseTransform* phase) {
|
||||
}
|
||||
|
||||
if (st_offset != offset && st_offset != Type::OffsetBot) {
|
||||
const int MAX_STORE = BytesPerLong;
|
||||
const int MAX_STORE = MAX2(BytesPerLong, (int)MaxVectorSize);
|
||||
assert(mem->as_Store()->memory_size() <= MAX_STORE, "");
|
||||
if (st_offset >= offset + size_in_bytes ||
|
||||
st_offset <= offset - MAX_STORE ||
|
||||
st_offset <= offset - mem->as_Store()->memory_size()) {
|
||||
@ -1111,11 +1112,16 @@ Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const {
|
||||
// (This is one of the few places where a generic PhaseTransform
|
||||
// can create new nodes. Think of it as lazily manifesting
|
||||
// virtually pre-existing constants.)
|
||||
if (ReduceBulkZeroing || find_array_copy_clone(phase, ld_alloc, in(MemNode::Memory)) == NULL) {
|
||||
// If ReduceBulkZeroing is disabled, we need to check if the allocation does not belong to an
|
||||
// ArrayCopyNode clone. If it does, then we cannot assume zero since the initialization is done
|
||||
// by the ArrayCopyNode.
|
||||
return phase->zerocon(memory_type());
|
||||
if (memory_type() != T_VOID) {
|
||||
if (ReduceBulkZeroing || find_array_copy_clone(phase, ld_alloc, in(MemNode::Memory)) == NULL) {
|
||||
// If ReduceBulkZeroing is disabled, we need to check if the allocation does not belong to an
|
||||
// ArrayCopyNode clone. If it does, then we cannot assume zero since the initialization is done
|
||||
// by the ArrayCopyNode.
|
||||
return phase->zerocon(memory_type());
|
||||
}
|
||||
} else {
|
||||
// TODO: materialize all-zero vector constant
|
||||
assert(!isa_Load() || as_Load()->type()->isa_vect(), "");
|
||||
}
|
||||
}
|
||||
|
||||
@ -2561,6 +2567,8 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
assert(Opcode() == st->Opcode() ||
|
||||
st->Opcode() == Op_StoreVector ||
|
||||
Opcode() == Op_StoreVector ||
|
||||
st->Opcode() == Op_StoreVectorScatter ||
|
||||
Opcode() == Op_StoreVectorScatter ||
|
||||
phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw ||
|
||||
(Opcode() == Op_StoreL && st->Opcode() == Op_StoreI) || // expanded ClearArrayNode
|
||||
(Opcode() == Op_StoreI && st->Opcode() == Op_StoreL) || // initialization by arraycopy
|
||||
@ -3744,7 +3752,7 @@ intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseGVN* phase, bool
|
||||
int InitializeNode::captured_store_insertion_point(intptr_t start,
|
||||
int size_in_bytes,
|
||||
PhaseTransform* phase) {
|
||||
const int FAIL = 0, MAX_STORE = BytesPerLong;
|
||||
const int FAIL = 0, MAX_STORE = MAX2(BytesPerLong, (int)MaxVectorSize);
|
||||
|
||||
if (is_complete())
|
||||
return FAIL; // arraycopy got here first; punt
|
||||
@ -3774,6 +3782,7 @@ int InitializeNode::captured_store_insertion_point(intptr_t start,
|
||||
}
|
||||
return -(int)i; // not found; here is where to put it
|
||||
} else if (st_off < start) {
|
||||
assert(st->as_Store()->memory_size() <= MAX_STORE, "");
|
||||
if (size_in_bytes != 0 &&
|
||||
start < st_off + MAX_STORE &&
|
||||
start < st_off + st->as_Store()->memory_size()) {
|
||||
|
||||
@ -363,6 +363,14 @@ const Type* MoveL2DNode::Value(PhaseGVN* phase) const {
|
||||
return TypeD::make( v.get_jdouble() );
|
||||
}
|
||||
|
||||
//------------------------------Identity----------------------------------------
|
||||
Node* MoveL2DNode::Identity(PhaseGVN* phase) {
|
||||
if (in(1)->Opcode() == Op_MoveD2L) {
|
||||
return in(1)->in(1);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* MoveI2FNode::Value(PhaseGVN* phase) const {
|
||||
const Type *t = phase->type( in(1) );
|
||||
@ -374,6 +382,14 @@ const Type* MoveI2FNode::Value(PhaseGVN* phase) const {
|
||||
return TypeF::make( v.get_jfloat() );
|
||||
}
|
||||
|
||||
//------------------------------Identity----------------------------------------
|
||||
Node* MoveI2FNode::Identity(PhaseGVN* phase) {
|
||||
if (in(1)->Opcode() == Op_MoveF2I) {
|
||||
return in(1)->in(1);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* MoveF2INode::Value(PhaseGVN* phase) const {
|
||||
const Type *t = phase->type( in(1) );
|
||||
@ -385,6 +401,14 @@ const Type* MoveF2INode::Value(PhaseGVN* phase) const {
|
||||
return TypeInt::make( v.get_jint() );
|
||||
}
|
||||
|
||||
//------------------------------Identity----------------------------------------
|
||||
Node* MoveF2INode::Identity(PhaseGVN* phase) {
|
||||
if (in(1)->Opcode() == Op_MoveI2F) {
|
||||
return in(1)->in(1);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* MoveD2LNode::Value(PhaseGVN* phase) const {
|
||||
const Type *t = phase->type( in(1) );
|
||||
@ -396,6 +420,14 @@ const Type* MoveD2LNode::Value(PhaseGVN* phase) const {
|
||||
return TypeLong::make( v.get_jlong() );
|
||||
}
|
||||
|
||||
//------------------------------Identity----------------------------------------
|
||||
Node* MoveD2LNode::Identity(PhaseGVN* phase) {
|
||||
if (in(1)->Opcode() == Op_MoveL2D) {
|
||||
return in(1)->in(1);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
//----------------------------BinaryNode---------------------------------------
|
||||
// The set of related nodes for a BinaryNode is all data inputs and all outputs
|
||||
|
||||
@ -105,6 +105,7 @@ class MoveI2FNode : public Node {
|
||||
virtual const Type *bottom_type() const { return Type::FLOAT; }
|
||||
virtual uint ideal_reg() const { return Op_RegF; }
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
virtual Node* Identity(PhaseGVN* phase);
|
||||
};
|
||||
|
||||
class MoveL2DNode : public Node {
|
||||
@ -114,6 +115,7 @@ class MoveL2DNode : public Node {
|
||||
virtual const Type *bottom_type() const { return Type::DOUBLE; }
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
virtual Node* Identity(PhaseGVN* phase);
|
||||
};
|
||||
|
||||
class MoveF2INode : public Node {
|
||||
@ -123,6 +125,7 @@ class MoveF2INode : public Node {
|
||||
virtual const Type *bottom_type() const { return TypeInt::INT; }
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
virtual Node* Identity(PhaseGVN* phase);
|
||||
};
|
||||
|
||||
class MoveD2LNode : public Node {
|
||||
@ -132,6 +135,7 @@ class MoveD2LNode : public Node {
|
||||
virtual const Type *bottom_type() const { return TypeLong::LONG; }
|
||||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
virtual Node* Identity(PhaseGVN* phase);
|
||||
};
|
||||
|
||||
//------------------------------BinaryNode-------------------------------------
|
||||
|
||||
@ -259,6 +259,25 @@ public:
|
||||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
};
|
||||
|
||||
//------------------------------URShiftBNode-----------------------------------
|
||||
// Logical shift right
|
||||
class URShiftBNode : public Node {
|
||||
public:
|
||||
URShiftBNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {
|
||||
ShouldNotReachHere(); // only vector variant is used
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------URShiftSNode-----------------------------------
|
||||
// Logical shift right
|
||||
class URShiftSNode : public Node {
|
||||
public:
|
||||
URShiftSNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {
|
||||
ShouldNotReachHere(); // only vector variant is used
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------URShiftINode-----------------------------------
|
||||
// Logical shift right
|
||||
|
||||
@ -152,7 +152,10 @@ class TypeNode;
|
||||
class UnlockNode;
|
||||
class VectorNode;
|
||||
class LoadVectorNode;
|
||||
class LoadVectorGatherNode;
|
||||
class StoreVectorNode;
|
||||
class StoreVectorScatterNode;
|
||||
class VectorMaskCmpNode;
|
||||
class VectorSet;
|
||||
typedef void (*NFunc)(Node&,void*);
|
||||
extern "C" {
|
||||
@ -688,8 +691,10 @@ public:
|
||||
DEFINE_CLASS_ID(Mem, Node, 4)
|
||||
DEFINE_CLASS_ID(Load, Mem, 0)
|
||||
DEFINE_CLASS_ID(LoadVector, Load, 0)
|
||||
DEFINE_CLASS_ID(LoadVectorGather, LoadVector, 0)
|
||||
DEFINE_CLASS_ID(Store, Mem, 1)
|
||||
DEFINE_CLASS_ID(StoreVector, Store, 0)
|
||||
DEFINE_CLASS_ID(StoreVectorScatter, StoreVector, 0)
|
||||
DEFINE_CLASS_ID(LoadStore, Mem, 2)
|
||||
DEFINE_CLASS_ID(LoadStoreConditional, LoadStore, 0)
|
||||
DEFINE_CLASS_ID(CompareAndSwap, LoadStoreConditional, 0)
|
||||
@ -714,6 +719,7 @@ public:
|
||||
DEFINE_CLASS_ID(Add, Node, 11)
|
||||
DEFINE_CLASS_ID(Mul, Node, 12)
|
||||
DEFINE_CLASS_ID(Vector, Node, 13)
|
||||
DEFINE_CLASS_ID(VectorMaskCmp, Vector, 0)
|
||||
DEFINE_CLASS_ID(ClearArray, Node, 14)
|
||||
DEFINE_CLASS_ID(Halt, Node, 15)
|
||||
DEFINE_CLASS_ID(Opaque1, Node, 16)
|
||||
@ -884,7 +890,10 @@ public:
|
||||
DEFINE_CLASS_QUERY(Type)
|
||||
DEFINE_CLASS_QUERY(Vector)
|
||||
DEFINE_CLASS_QUERY(LoadVector)
|
||||
DEFINE_CLASS_QUERY(LoadVectorGather)
|
||||
DEFINE_CLASS_QUERY(StoreVector)
|
||||
DEFINE_CLASS_QUERY(StoreVectorScatter)
|
||||
DEFINE_CLASS_QUERY(VectorMaskCmp)
|
||||
DEFINE_CLASS_QUERY(Unlock)
|
||||
|
||||
#undef DEFINE_CLASS_QUERY
|
||||
|
||||
@ -892,6 +892,10 @@ void PhaseOutput::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
|
||||
? Location::int_in_long : Location::normal ));
|
||||
} else if( t->base() == Type::NarrowOop ) {
|
||||
array->append(new_loc_value( C->regalloc(), regnum, Location::narrowoop ));
|
||||
} else if ( t->base() == Type::VectorS || t->base() == Type::VectorD ||
|
||||
t->base() == Type::VectorX || t->base() == Type::VectorY ||
|
||||
t->base() == Type::VectorZ) {
|
||||
array->append(new_loc_value( C->regalloc(), regnum, Location::vector ));
|
||||
} else {
|
||||
array->append(new_loc_value( C->regalloc(), regnum, C->regalloc()->is_oop(local) ? Location::oop : Location::normal ));
|
||||
}
|
||||
|
||||
@ -78,6 +78,10 @@ void Phase::print_timers() {
|
||||
}
|
||||
}
|
||||
tty->print_cr (" Renumber Live: %7.3f s", timers[_t_renumberLive].seconds());
|
||||
tty->print_cr (" Vector: %7.3f s", timers[_t_vector].seconds());
|
||||
tty->print_cr (" Box elimination: %7.3f s", timers[_t_vector_elimination].seconds());
|
||||
tty->print_cr (" IGVN: %7.3f s", timers[_t_vector_igvn].seconds());
|
||||
tty->print_cr (" Prune Useless: %7.3f s", timers[_t_vector_pru].seconds());
|
||||
tty->print_cr (" IdealLoop: %7.3f s", timers[_t_idealLoop].seconds());
|
||||
tty->print_cr (" IdealLoop Verify: %7.3f s", timers[_t_idealLoopVerify].seconds());
|
||||
tty->print_cr (" Cond Const Prop: %7.3f s", timers[_t_ccp].seconds());
|
||||
|
||||
@ -59,6 +59,7 @@ public:
|
||||
Ideal_Loop, // Find idealized trip-counted loops
|
||||
Macro_Expand, // Expand macro nodes
|
||||
Peephole, // Apply peephole optimizations
|
||||
Vector,
|
||||
Output,
|
||||
last_phase
|
||||
};
|
||||
@ -75,6 +76,10 @@ public:
|
||||
_t_incrInline_igvn,
|
||||
_t_incrInline_pru,
|
||||
_t_incrInline_inline,
|
||||
_t_vector,
|
||||
_t_vector_elimination,
|
||||
_t_vector_igvn,
|
||||
_t_vector_pru,
|
||||
_t_renumberLive,
|
||||
_t_idealLoop,
|
||||
_t_idealLoopVerify,
|
||||
|
||||
@ -31,7 +31,14 @@ enum CompilerPhaseType {
|
||||
PHASE_BEFORE_REMOVEUSELESS,
|
||||
PHASE_AFTER_PARSING,
|
||||
PHASE_ITER_GVN1,
|
||||
PHASE_EXPAND_VUNBOX,
|
||||
PHASE_SCALARIZE_VBOX,
|
||||
PHASE_INLINE_VECTOR_REBOX,
|
||||
PHASE_EXPAND_VBOX,
|
||||
PHASE_ELIMINATE_VBOX_ALLOC,
|
||||
PHASE_PHASEIDEAL_BEFORE_EA,
|
||||
PHASE_ITER_GVN_AFTER_VECTOR,
|
||||
PHASE_ITER_GVN_BEFORE_EA,
|
||||
PHASE_ITER_GVN_AFTER_EA,
|
||||
PHASE_ITER_GVN_AFTER_ELIMINATION,
|
||||
PHASE_PHASEIDEALLOOP1,
|
||||
@ -41,6 +48,7 @@ enum CompilerPhaseType {
|
||||
PHASE_ITER_GVN2,
|
||||
PHASE_PHASEIDEALLOOP_ITERATIONS,
|
||||
PHASE_OPTIMIZE_FINISHED,
|
||||
PHASE_AFTER_MATCHING,
|
||||
PHASE_GLOBAL_CODE_MOTION,
|
||||
PHASE_FINAL_CODE,
|
||||
PHASE_AFTER_EA,
|
||||
@ -51,6 +59,7 @@ enum CompilerPhaseType {
|
||||
PHASE_BEFORE_MATCHING,
|
||||
PHASE_MATCHING,
|
||||
PHASE_INCREMENTAL_INLINE,
|
||||
PHASE_INCREMENTAL_INLINE_STEP,
|
||||
PHASE_INCREMENTAL_BOXING_INLINE,
|
||||
PHASE_CALL_CATCH_CLEANUP,
|
||||
PHASE_INSERT_BARRIER,
|
||||
@ -73,7 +82,14 @@ class CompilerPhaseTypeHelper {
|
||||
case PHASE_BEFORE_REMOVEUSELESS: return "Before RemoveUseless";
|
||||
case PHASE_AFTER_PARSING: return "After Parsing";
|
||||
case PHASE_ITER_GVN1: return "Iter GVN 1";
|
||||
case PHASE_EXPAND_VUNBOX: return "Expand VectorUnbox";
|
||||
case PHASE_SCALARIZE_VBOX: return "Scalarize VectorBox";
|
||||
case PHASE_INLINE_VECTOR_REBOX: return "Inline Vector Rebox Calls";
|
||||
case PHASE_EXPAND_VBOX: return "Expand VectorBox";
|
||||
case PHASE_ELIMINATE_VBOX_ALLOC: return "Eliminate VectorBoxAllocate";
|
||||
case PHASE_PHASEIDEAL_BEFORE_EA: return "PhaseIdealLoop before EA";
|
||||
case PHASE_ITER_GVN_AFTER_VECTOR: return "Iter GVN after vector box elimination";
|
||||
case PHASE_ITER_GVN_BEFORE_EA: return "Iter GVN before EA";
|
||||
case PHASE_ITER_GVN_AFTER_EA: return "Iter GVN after EA";
|
||||
case PHASE_ITER_GVN_AFTER_ELIMINATION: return "Iter GVN after eliminating allocations and locks";
|
||||
case PHASE_PHASEIDEALLOOP1: return "PhaseIdealLoop 1";
|
||||
@ -83,6 +99,7 @@ class CompilerPhaseTypeHelper {
|
||||
case PHASE_ITER_GVN2: return "Iter GVN 2";
|
||||
case PHASE_PHASEIDEALLOOP_ITERATIONS: return "PhaseIdealLoop iterations";
|
||||
case PHASE_OPTIMIZE_FINISHED: return "Optimize finished";
|
||||
case PHASE_AFTER_MATCHING: return "After Matching";
|
||||
case PHASE_GLOBAL_CODE_MOTION: return "Global code motion";
|
||||
case PHASE_FINAL_CODE: return "Final Code";
|
||||
case PHASE_AFTER_EA: return "After Escape Analysis";
|
||||
@ -93,6 +110,7 @@ class CompilerPhaseTypeHelper {
|
||||
case PHASE_BEFORE_MATCHING: return "Before matching";
|
||||
case PHASE_MATCHING: return "After matching";
|
||||
case PHASE_INCREMENTAL_INLINE: return "Incremental Inline";
|
||||
case PHASE_INCREMENTAL_INLINE_STEP: return "Incremental Inline Step";
|
||||
case PHASE_INCREMENTAL_BOXING_INLINE: return "Incremental Boxing Inline";
|
||||
case PHASE_CALL_CATCH_CLEANUP: return "Call catch cleanup";
|
||||
case PHASE_INSERT_BARRIER: return "Insert barrier";
|
||||
|
||||
@ -404,6 +404,28 @@ public:
|
||||
NegNode( Node *in1 ) : Node(0,in1) {}
|
||||
};
|
||||
|
||||
//------------------------------NegINode---------------------------------------
|
||||
// Negate value an int. For int values, negation is the same as subtraction
|
||||
// from zero
|
||||
class NegINode : public NegNode {
|
||||
public:
|
||||
NegINode(Node *in1) : NegNode(in1) {}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return TypeInt::INT; }
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
//------------------------------NegLNode---------------------------------------
|
||||
// Negate value an int. For int values, negation is the same as subtraction
|
||||
// from zero
|
||||
class NegLNode : public NegNode {
|
||||
public:
|
||||
NegLNode(Node *in1) : NegNode(in1) {}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return TypeLong::LONG; }
|
||||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
};
|
||||
|
||||
//------------------------------NegFNode---------------------------------------
|
||||
// Negate value a float. Negating 0.0 returns -0.0, but subtracting from
|
||||
// zero returns +0.0 (per JVM spec on 'fneg' bytecode). As subtraction
|
||||
|
||||
@ -2767,7 +2767,7 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
|
||||
}
|
||||
}
|
||||
// Move shift count into vector register.
|
||||
cnt = VectorNode::shift_count(p0, cnt, vlen, velt_basic_type(p0));
|
||||
cnt = VectorNode::shift_count(p0->Opcode(), cnt, vlen, velt_basic_type(p0));
|
||||
_igvn.register_new_node_with_optimizer(cnt);
|
||||
_phase->set_ctrl(cnt, _phase->get_ctrl(opd));
|
||||
return cnt;
|
||||
|
||||
@ -439,16 +439,22 @@ void Type::Initialize_shared(Compile* current) {
|
||||
BOTTOM = make(Bottom); // Everything
|
||||
HALF = make(Half); // Placeholder half of doublewide type
|
||||
|
||||
TypeF::MAX = TypeF::make(max_jfloat); // Float MAX
|
||||
TypeF::MIN = TypeF::make(min_jfloat); // Float MIN
|
||||
TypeF::ZERO = TypeF::make(0.0); // Float 0 (positive zero)
|
||||
TypeF::ONE = TypeF::make(1.0); // Float 1
|
||||
TypeF::POS_INF = TypeF::make(jfloat_cast(POSITIVE_INFINITE_F));
|
||||
TypeF::NEG_INF = TypeF::make(-jfloat_cast(POSITIVE_INFINITE_F));
|
||||
|
||||
TypeD::MAX = TypeD::make(max_jdouble); // Double MAX
|
||||
TypeD::MIN = TypeD::make(min_jdouble); // Double MIN
|
||||
TypeD::ZERO = TypeD::make(0.0); // Double 0 (positive zero)
|
||||
TypeD::ONE = TypeD::make(1.0); // Double 1
|
||||
TypeD::POS_INF = TypeD::make(jdouble_cast(POSITIVE_INFINITE_D));
|
||||
TypeD::NEG_INF = TypeD::make(-jdouble_cast(POSITIVE_INFINITE_D));
|
||||
|
||||
TypeInt::MAX = TypeInt::make(max_jint); // Int MAX
|
||||
TypeInt::MIN = TypeInt::make(min_jint); // Int MIN
|
||||
TypeInt::MINUS_1 = TypeInt::make(-1); // -1
|
||||
TypeInt::ZERO = TypeInt::make( 0); // 0
|
||||
TypeInt::ONE = TypeInt::make( 1); // 1
|
||||
@ -477,6 +483,8 @@ void Type::Initialize_shared(Compile* current) {
|
||||
assert( TypeInt::CC_GE == TypeInt::BOOL, "types must match for CmpL to work" );
|
||||
assert( (juint)(TypeInt::CC->_hi - TypeInt::CC->_lo) <= SMALLINT, "CC is truly small");
|
||||
|
||||
TypeLong::MAX = TypeLong::make(max_jlong); // Long MAX
|
||||
TypeLong::MIN = TypeLong::make(min_jlong); // Long MIN
|
||||
TypeLong::MINUS_1 = TypeLong::make(-1); // -1
|
||||
TypeLong::ZERO = TypeLong::make( 0); // 0
|
||||
TypeLong::ONE = TypeLong::make( 1); // 1
|
||||
@ -1119,6 +1127,8 @@ void Type::typerr( const Type *t ) const {
|
||||
|
||||
//=============================================================================
|
||||
// Convenience common pre-built types.
|
||||
const TypeF *TypeF::MAX; // Floating point max
|
||||
const TypeF *TypeF::MIN; // Floating point min
|
||||
const TypeF *TypeF::ZERO; // Floating point zero
|
||||
const TypeF *TypeF::ONE; // Floating point one
|
||||
const TypeF *TypeF::POS_INF; // Floating point positive infinity
|
||||
@ -1229,6 +1239,8 @@ bool TypeF::empty(void) const {
|
||||
|
||||
//=============================================================================
|
||||
// Convenience common pre-built types.
|
||||
const TypeD *TypeD::MAX; // Floating point max
|
||||
const TypeD *TypeD::MIN; // Floating point min
|
||||
const TypeD *TypeD::ZERO; // Floating point zero
|
||||
const TypeD *TypeD::ONE; // Floating point one
|
||||
const TypeD *TypeD::POS_INF; // Floating point positive infinity
|
||||
@ -1335,6 +1347,8 @@ bool TypeD::empty(void) const {
|
||||
|
||||
//=============================================================================
|
||||
// Convience common pre-built types.
|
||||
const TypeInt *TypeInt::MAX; // INT_MAX
|
||||
const TypeInt *TypeInt::MIN; // INT_MIN
|
||||
const TypeInt *TypeInt::MINUS_1;// -1
|
||||
const TypeInt *TypeInt::ZERO; // 0
|
||||
const TypeInt *TypeInt::ONE; // 1
|
||||
@ -1604,6 +1618,8 @@ bool TypeInt::empty(void) const {
|
||||
|
||||
//=============================================================================
|
||||
// Convenience common pre-built types.
|
||||
const TypeLong *TypeLong::MAX;
|
||||
const TypeLong *TypeLong::MIN;
|
||||
const TypeLong *TypeLong::MINUS_1;// -1
|
||||
const TypeLong *TypeLong::ZERO; // 0
|
||||
const TypeLong *TypeLong::ONE; // 1
|
||||
|
||||
@ -483,6 +483,8 @@ public:
|
||||
virtual const Type *xmeet( const Type *t ) const;
|
||||
virtual const Type *xdual() const; // Compute dual right now.
|
||||
// Convenience common pre-built types.
|
||||
static const TypeF *MAX;
|
||||
static const TypeF *MIN;
|
||||
static const TypeF *ZERO; // positive zero only
|
||||
static const TypeF *ONE;
|
||||
static const TypeF *POS_INF;
|
||||
@ -512,6 +514,8 @@ public:
|
||||
virtual const Type *xmeet( const Type *t ) const;
|
||||
virtual const Type *xdual() const; // Compute dual right now.
|
||||
// Convenience common pre-built types.
|
||||
static const TypeD *MAX;
|
||||
static const TypeD *MIN;
|
||||
static const TypeD *ZERO; // positive zero only
|
||||
static const TypeD *ONE;
|
||||
static const TypeD *POS_INF;
|
||||
@ -555,6 +559,8 @@ public:
|
||||
virtual const Type *narrow( const Type *t ) const;
|
||||
// Do not kill _widen bits.
|
||||
// Convenience common pre-built types.
|
||||
static const TypeInt *MAX;
|
||||
static const TypeInt *MIN;
|
||||
static const TypeInt *MINUS_1;
|
||||
static const TypeInt *ZERO;
|
||||
static const TypeInt *ONE;
|
||||
@ -620,6 +626,8 @@ public:
|
||||
virtual const Type *widen( const Type *t, const Type* limit_type ) const;
|
||||
virtual const Type *narrow( const Type *t ) const;
|
||||
// Convenience common pre-built types.
|
||||
static const TypeLong *MAX;
|
||||
static const TypeLong *MIN;
|
||||
static const TypeLong *MINUS_1;
|
||||
static const TypeLong *ZERO;
|
||||
static const TypeLong *ONE;
|
||||
|
||||
466
src/hotspot/share/opto/vector.cpp
Normal file
466
src/hotspot/share/opto/vector.cpp
Normal file
@ -0,0 +1,466 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "opto/castnode.hpp"
|
||||
#include "opto/graphKit.hpp"
|
||||
#include "opto/phaseX.hpp"
|
||||
#include "opto/rootnode.hpp"
|
||||
#include "opto/vector.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
void PhaseVector::optimize_vector_boxes() {
|
||||
Compile::TracePhase tp("vector_elimination", &timers[_t_vector_elimination]);
|
||||
|
||||
// Signal GraphKit it's post-parse phase.
|
||||
assert(C->inlining_incrementally() == false, "sanity");
|
||||
C->set_inlining_incrementally(true);
|
||||
|
||||
C->for_igvn()->clear();
|
||||
C->initial_gvn()->replace_with(&_igvn);
|
||||
|
||||
expand_vunbox_nodes();
|
||||
scalarize_vbox_nodes();
|
||||
|
||||
C->inline_vector_reboxing_calls();
|
||||
|
||||
expand_vbox_nodes();
|
||||
eliminate_vbox_alloc_nodes();
|
||||
|
||||
C->set_inlining_incrementally(false);
|
||||
|
||||
do_cleanup();
|
||||
}
|
||||
|
||||
void PhaseVector::do_cleanup() {
|
||||
if (C->failing()) return;
|
||||
{
|
||||
Compile::TracePhase tp("vector_pru", &timers[_t_vector_pru]);
|
||||
ResourceMark rm;
|
||||
PhaseRemoveUseless pru(C->initial_gvn(), C->for_igvn());
|
||||
if (C->failing()) return;
|
||||
}
|
||||
{
|
||||
Compile::TracePhase tp("incrementalInline_igvn", &timers[_t_vector_igvn]);
|
||||
_igvn = PhaseIterGVN(C->initial_gvn());
|
||||
_igvn.optimize();
|
||||
if (C->failing()) return;
|
||||
}
|
||||
C->print_method(PHASE_ITER_GVN_BEFORE_EA, 3);
|
||||
}
|
||||
|
||||
void PhaseVector::scalarize_vbox_nodes() {
|
||||
if (C->failing()) return;
|
||||
|
||||
if (!EnableVectorReboxing) {
|
||||
return; // don't scalarize vector boxes
|
||||
}
|
||||
|
||||
int macro_idx = C->macro_count() - 1;
|
||||
while (macro_idx >= 0) {
|
||||
Node * n = C->macro_node(macro_idx);
|
||||
assert(n->is_macro(), "only macro nodes expected here");
|
||||
if (n->Opcode() == Op_VectorBox) {
|
||||
VectorBoxNode* vbox = static_cast<VectorBoxNode*>(n);
|
||||
scalarize_vbox_node(vbox);
|
||||
if (C->failing()) return;
|
||||
C->print_method(PHASE_SCALARIZE_VBOX, vbox, 3);
|
||||
}
|
||||
if (C->failing()) return;
|
||||
macro_idx = MIN2(macro_idx - 1, C->macro_count() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseVector::expand_vbox_nodes() {
|
||||
if (C->failing()) return;
|
||||
|
||||
int macro_idx = C->macro_count() - 1;
|
||||
while (macro_idx >= 0) {
|
||||
Node * n = C->macro_node(macro_idx);
|
||||
assert(n->is_macro(), "only macro nodes expected here");
|
||||
if (n->Opcode() == Op_VectorBox) {
|
||||
VectorBoxNode* vbox = static_cast<VectorBoxNode*>(n);
|
||||
expand_vbox_node(vbox);
|
||||
if (C->failing()) return;
|
||||
}
|
||||
if (C->failing()) return;
|
||||
macro_idx = MIN2(macro_idx - 1, C->macro_count() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseVector::expand_vunbox_nodes() {
|
||||
if (C->failing()) return;
|
||||
|
||||
int macro_idx = C->macro_count() - 1;
|
||||
while (macro_idx >= 0) {
|
||||
Node * n = C->macro_node(macro_idx);
|
||||
assert(n->is_macro(), "only macro nodes expected here");
|
||||
if (n->Opcode() == Op_VectorUnbox) {
|
||||
VectorUnboxNode* vec_unbox = static_cast<VectorUnboxNode*>(n);
|
||||
expand_vunbox_node(vec_unbox);
|
||||
if (C->failing()) return;
|
||||
C->print_method(PHASE_EXPAND_VUNBOX, vec_unbox, 3);
|
||||
}
|
||||
if (C->failing()) return;
|
||||
macro_idx = MIN2(macro_idx - 1, C->macro_count() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseVector::eliminate_vbox_alloc_nodes() {
|
||||
if (C->failing()) return;
|
||||
|
||||
int macro_idx = C->macro_count() - 1;
|
||||
while (macro_idx >= 0) {
|
||||
Node * n = C->macro_node(macro_idx);
|
||||
assert(n->is_macro(), "only macro nodes expected here");
|
||||
if (n->Opcode() == Op_VectorBoxAllocate) {
|
||||
VectorBoxAllocateNode* vbox_alloc = static_cast<VectorBoxAllocateNode*>(n);
|
||||
eliminate_vbox_alloc_node(vbox_alloc);
|
||||
if (C->failing()) return;
|
||||
C->print_method(PHASE_ELIMINATE_VBOX_ALLOC, vbox_alloc, 3);
|
||||
}
|
||||
if (C->failing()) return;
|
||||
macro_idx = MIN2(macro_idx - 1, C->macro_count() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static JVMState* clone_jvms(Compile* C, SafePointNode* sfpt) {
|
||||
JVMState* new_jvms = sfpt->jvms()->clone_shallow(C);
|
||||
uint size = sfpt->req();
|
||||
SafePointNode* map = new SafePointNode(size, new_jvms);
|
||||
for (uint i = 0; i < size; i++) {
|
||||
map->init_req(i, sfpt->in(i));
|
||||
}
|
||||
new_jvms->set_map(map);
|
||||
return new_jvms;
|
||||
}
|
||||
|
||||
void PhaseVector::scalarize_vbox_node(VectorBoxNode* vec_box) {
|
||||
Node* vec_value = vec_box->in(VectorBoxNode::Value);
|
||||
PhaseGVN& gvn = *C->initial_gvn();
|
||||
|
||||
// Process merged VBAs
|
||||
|
||||
if (EnableVectorAggressiveReboxing) {
|
||||
Unique_Node_List calls(C->comp_arena());
|
||||
for (DUIterator_Fast imax, i = vec_box->fast_outs(imax); i < imax; i++) {
|
||||
Node* use = vec_box->fast_out(i);
|
||||
if (use->is_CallJava()) {
|
||||
CallJavaNode* call = use->as_CallJava();
|
||||
if (call->has_non_debug_use(vec_box) && vec_box->in(VectorBoxNode::Box)->is_Phi()) {
|
||||
calls.push(call);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (calls.size() > 0) {
|
||||
CallJavaNode* call = calls.pop()->as_CallJava();
|
||||
// Attach new VBA to the call and use it instead of Phi (VBA ... VBA).
|
||||
|
||||
JVMState* jvms = clone_jvms(C, call);
|
||||
GraphKit kit(jvms);
|
||||
PhaseGVN& gvn = kit.gvn();
|
||||
|
||||
// Adjust JVMS from post-call to pre-call state: put args on stack
|
||||
uint nargs = call->method()->arg_size();
|
||||
kit.ensure_stack(kit.sp() + nargs);
|
||||
for (uint i = TypeFunc::Parms; i < call->tf()->domain()->cnt(); i++) {
|
||||
kit.push(call->in(i));
|
||||
}
|
||||
jvms = kit.sync_jvms();
|
||||
|
||||
Node* new_vbox = NULL;
|
||||
{
|
||||
PreserveReexecuteState prs(&kit);
|
||||
|
||||
kit.jvms()->set_should_reexecute(true);
|
||||
|
||||
const TypeInstPtr* vbox_type = vec_box->box_type();
|
||||
const TypeVect* vect_type = vec_box->vec_type();
|
||||
Node* vect = vec_box->in(VectorBoxNode::Value);
|
||||
|
||||
VectorBoxAllocateNode* alloc = new VectorBoxAllocateNode(C, vbox_type);
|
||||
kit.set_edges_for_java_call(alloc, /*must_throw=*/false, /*separate_io_proj=*/true);
|
||||
kit.make_slow_call_ex(alloc, C->env()->Throwable_klass(), /*separate_io_proj=*/true, /*deoptimize=*/true);
|
||||
kit.set_i_o(gvn.transform( new ProjNode(alloc, TypeFunc::I_O) ));
|
||||
kit.set_all_memory(gvn.transform( new ProjNode(alloc, TypeFunc::Memory) ));
|
||||
Node* ret = gvn.transform(new ProjNode(alloc, TypeFunc::Parms));
|
||||
|
||||
new_vbox = gvn.transform(new VectorBoxNode(C, ret, vect, vbox_type, vect_type));
|
||||
|
||||
kit.replace_in_map(vec_box, new_vbox);
|
||||
}
|
||||
|
||||
kit.dec_sp(nargs);
|
||||
jvms = kit.sync_jvms();
|
||||
|
||||
call->set_req(TypeFunc::Control , kit.control());
|
||||
call->set_req(TypeFunc::I_O , kit.i_o());
|
||||
call->set_req(TypeFunc::Memory , kit.reset_memory());
|
||||
call->set_req(TypeFunc::FramePtr, kit.frameptr());
|
||||
call->replace_edge(vec_box, new_vbox);
|
||||
|
||||
C->record_for_igvn(call);
|
||||
}
|
||||
}
|
||||
|
||||
// Process debug uses at safepoints
|
||||
Unique_Node_List safepoints(C->comp_arena());
|
||||
|
||||
for (DUIterator_Fast imax, i = vec_box->fast_outs(imax); i < imax; i++) {
|
||||
Node* use = vec_box->fast_out(i);
|
||||
if (use->is_SafePoint()) {
|
||||
SafePointNode* sfpt = use->as_SafePoint();
|
||||
if (!sfpt->is_Call() || !sfpt->as_Call()->has_non_debug_use(vec_box)) {
|
||||
safepoints.push(sfpt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (safepoints.size() > 0) {
|
||||
SafePointNode* sfpt = safepoints.pop()->as_SafePoint();
|
||||
|
||||
uint first_ind = (sfpt->req() - sfpt->jvms()->scloff());
|
||||
Node* sobj = new SafePointScalarObjectNode(vec_box->box_type(),
|
||||
#ifdef ASSERT
|
||||
NULL,
|
||||
#endif // ASSERT
|
||||
first_ind, /*n_fields=*/1);
|
||||
sobj->init_req(0, C->root());
|
||||
sfpt->add_req(vec_value);
|
||||
|
||||
sobj = gvn.transform(sobj);
|
||||
|
||||
JVMState *jvms = sfpt->jvms();
|
||||
|
||||
jvms->set_endoff(sfpt->req());
|
||||
// Now make a pass over the debug information replacing any references
|
||||
// to the allocated object with "sobj"
|
||||
int start = jvms->debug_start();
|
||||
int end = jvms->debug_end();
|
||||
sfpt->replace_edges_in_range(vec_box, sobj, start, end);
|
||||
|
||||
C->record_for_igvn(sfpt);
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseVector::expand_vbox_node(VectorBoxNode* vec_box) {
|
||||
if (vec_box->outcnt() > 0) {
|
||||
Node* vbox = vec_box->in(VectorBoxNode::Box);
|
||||
Node* vect = vec_box->in(VectorBoxNode::Value);
|
||||
Node* result = expand_vbox_node_helper(vbox, vect, vec_box->box_type(), vec_box->vec_type());
|
||||
C->gvn_replace_by(vec_box, result);
|
||||
C->print_method(PHASE_EXPAND_VBOX, vec_box, 3);
|
||||
}
|
||||
C->remove_macro_node(vec_box);
|
||||
}
|
||||
|
||||
Node* PhaseVector::expand_vbox_node_helper(Node* vbox,
|
||||
Node* vect,
|
||||
const TypeInstPtr* box_type,
|
||||
const TypeVect* vect_type) {
|
||||
if (vbox->is_Phi() && vect->is_Phi()) {
|
||||
assert(vbox->as_Phi()->region() == vect->as_Phi()->region(), "");
|
||||
Node* new_phi = new PhiNode(vbox->as_Phi()->region(), box_type);
|
||||
for (uint i = 1; i < vbox->req(); i++) {
|
||||
Node* new_box = expand_vbox_node_helper(vbox->in(i), vect->in(i), box_type, vect_type);
|
||||
new_phi->set_req(i, new_box);
|
||||
}
|
||||
new_phi = C->initial_gvn()->transform(new_phi);
|
||||
return new_phi;
|
||||
} else if (vbox->is_Proj() && vbox->in(0)->Opcode() == Op_VectorBoxAllocate) {
|
||||
VectorBoxAllocateNode* vbox_alloc = static_cast<VectorBoxAllocateNode*>(vbox->in(0));
|
||||
return expand_vbox_alloc_node(vbox_alloc, vect, box_type, vect_type);
|
||||
} else {
|
||||
assert(!vbox->is_Phi(), "");
|
||||
// TODO: assert that expanded vbox is initialized with the same value (vect).
|
||||
return vbox; // already expanded
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_vector_mask(ciKlass* klass) {
|
||||
return klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass());
|
||||
}
|
||||
|
||||
static bool is_vector_shuffle(ciKlass* klass) {
|
||||
return klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass());
|
||||
}
|
||||
|
||||
Node* PhaseVector::expand_vbox_alloc_node(VectorBoxAllocateNode* vbox_alloc,
|
||||
Node* value,
|
||||
const TypeInstPtr* box_type,
|
||||
const TypeVect* vect_type) {
|
||||
JVMState* jvms = clone_jvms(C, vbox_alloc);
|
||||
GraphKit kit(jvms);
|
||||
PhaseGVN& gvn = kit.gvn();
|
||||
|
||||
ciInstanceKlass* box_klass = box_type->klass()->as_instance_klass();
|
||||
BasicType bt = vect_type->element_basic_type();
|
||||
int num_elem = vect_type->length();
|
||||
|
||||
bool is_mask = is_vector_mask(box_klass);
|
||||
if (is_mask && bt != T_BOOLEAN) {
|
||||
value = gvn.transform(VectorStoreMaskNode::make(gvn, value, bt, num_elem));
|
||||
// Although type of mask depends on its definition, in terms of storage everything is stored in boolean array.
|
||||
bt = T_BOOLEAN;
|
||||
assert(value->as_Vector()->bottom_type()->is_vect()->element_basic_type() == bt,
|
||||
"must be consistent with mask representation");
|
||||
}
|
||||
|
||||
// Generate array allocation for the field which holds the values.
|
||||
const TypeKlassPtr* array_klass = TypeKlassPtr::make(ciTypeArrayKlass::make(bt));
|
||||
Node* arr = kit.new_array(kit.makecon(array_klass), kit.intcon(num_elem), 1);
|
||||
|
||||
// Store the vector value into the array.
|
||||
// (The store should be captured by InitializeNode and turned into initialized store later.)
|
||||
Node* arr_adr = kit.array_element_address(arr, kit.intcon(0), bt);
|
||||
const TypePtr* arr_adr_type = arr_adr->bottom_type()->is_ptr();
|
||||
Node* arr_mem = kit.memory(arr_adr);
|
||||
Node* vstore = gvn.transform(StoreVectorNode::make(0,
|
||||
kit.control(),
|
||||
arr_mem,
|
||||
arr_adr,
|
||||
arr_adr_type,
|
||||
value,
|
||||
num_elem));
|
||||
kit.set_memory(vstore, arr_adr_type);
|
||||
|
||||
C->set_max_vector_size(MAX2(C->max_vector_size(), vect_type->length_in_bytes()));
|
||||
|
||||
// Generate the allocate for the Vector object.
|
||||
const TypeKlassPtr* klass_type = box_type->as_klass_type();
|
||||
Node* klass_node = kit.makecon(klass_type);
|
||||
Node* vec_obj = kit.new_instance(klass_node);
|
||||
|
||||
// Store the allocated array into object.
|
||||
ciField* field = ciEnv::current()->vector_VectorPayload_klass()->get_field_by_name(ciSymbol::payload_name(),
|
||||
ciSymbol::object_signature(),
|
||||
false);
|
||||
assert(field != NULL, "");
|
||||
Node* vec_field = kit.basic_plus_adr(vec_obj, field->offset_in_bytes());
|
||||
const TypePtr* vec_adr_type = vec_field->bottom_type()->is_ptr();
|
||||
|
||||
// The store should be captured by InitializeNode and turned into initialized store later.
|
||||
Node* field_store = gvn.transform(kit.access_store_at(vec_obj,
|
||||
vec_field,
|
||||
vec_adr_type,
|
||||
arr,
|
||||
TypeOopPtr::make_from_klass(field->type()->as_klass()),
|
||||
T_OBJECT,
|
||||
IN_HEAP));
|
||||
kit.set_memory(field_store, vec_adr_type);
|
||||
|
||||
kit.replace_call(vbox_alloc, vec_obj, true);
|
||||
C->remove_macro_node(vbox_alloc);
|
||||
|
||||
return vec_obj;
|
||||
}
|
||||
|
||||
void PhaseVector::expand_vunbox_node(VectorUnboxNode* vec_unbox) {
|
||||
if (vec_unbox->outcnt() > 0) {
|
||||
GraphKit kit;
|
||||
PhaseGVN& gvn = kit.gvn();
|
||||
|
||||
Node* obj = vec_unbox->obj();
|
||||
const TypeInstPtr* tinst = gvn.type(obj)->isa_instptr();
|
||||
ciInstanceKlass* from_kls = tinst->klass()->as_instance_klass();
|
||||
BasicType bt = vec_unbox->vect_type()->element_basic_type();
|
||||
BasicType masktype = bt;
|
||||
BasicType elem_bt;
|
||||
|
||||
if (is_vector_mask(from_kls)) {
|
||||
bt = T_BOOLEAN;
|
||||
} else if (is_vector_shuffle(from_kls)) {
|
||||
if (vec_unbox->is_shuffle_to_vector() == true) {
|
||||
elem_bt = bt;
|
||||
}
|
||||
bt = T_BYTE;
|
||||
}
|
||||
|
||||
ciField* field = ciEnv::current()->vector_VectorPayload_klass()->get_field_by_name(ciSymbol::payload_name(),
|
||||
ciSymbol::object_signature(),
|
||||
false);
|
||||
assert(field != NULL, "");
|
||||
int offset = field->offset_in_bytes();
|
||||
Node* vec_adr = kit.basic_plus_adr(obj, offset);
|
||||
|
||||
Node* mem = vec_unbox->mem();
|
||||
Node* ctrl = vec_unbox->in(0);
|
||||
Node* vec_field_ld = LoadNode::make(gvn,
|
||||
ctrl,
|
||||
mem,
|
||||
vec_adr,
|
||||
vec_adr->bottom_type()->is_ptr(),
|
||||
TypeOopPtr::make_from_klass(field->type()->as_klass()),
|
||||
T_OBJECT,
|
||||
MemNode::unordered);
|
||||
vec_field_ld = gvn.transform(vec_field_ld);
|
||||
|
||||
// For proper aliasing, attach concrete payload type.
|
||||
ciKlass* payload_klass = ciTypeArrayKlass::make(bt);
|
||||
const Type* payload_type = TypeAryPtr::make_from_klass(payload_klass)->cast_to_ptr_type(TypePtr::NotNull);
|
||||
vec_field_ld = gvn.transform(new CastPPNode(vec_field_ld, payload_type));
|
||||
|
||||
Node* adr = kit.array_element_address(vec_field_ld, gvn.intcon(0), bt);
|
||||
const TypePtr* adr_type = adr->bottom_type()->is_ptr();
|
||||
const TypeVect* vt = vec_unbox->bottom_type()->is_vect();
|
||||
int num_elem = vt->length();
|
||||
Node* vec_val_load = LoadVectorNode::make(0,
|
||||
ctrl,
|
||||
mem,
|
||||
adr,
|
||||
adr_type,
|
||||
num_elem,
|
||||
bt);
|
||||
vec_val_load = gvn.transform(vec_val_load);
|
||||
|
||||
C->set_max_vector_size(MAX2(C->max_vector_size(), vt->length_in_bytes()));
|
||||
|
||||
if (is_vector_mask(from_kls) && masktype != T_BOOLEAN) {
|
||||
assert(vec_unbox->bottom_type()->is_vect()->element_basic_type() == masktype, "expect mask type consistency");
|
||||
vec_val_load = gvn.transform(new VectorLoadMaskNode(vec_val_load, TypeVect::make(masktype, num_elem)));
|
||||
} else if (is_vector_shuffle(from_kls)) {
|
||||
if (vec_unbox->is_shuffle_to_vector() == false) {
|
||||
assert(vec_unbox->bottom_type()->is_vect()->element_basic_type() == masktype, "expect shuffle type consistency");
|
||||
vec_val_load = gvn.transform(new VectorLoadShuffleNode(vec_val_load, TypeVect::make(masktype, num_elem)));
|
||||
} else if (elem_bt != T_BYTE) {
|
||||
vec_val_load = gvn.transform(VectorCastNode::make(Op_VectorCastB2X, vec_val_load, elem_bt, num_elem));
|
||||
}
|
||||
}
|
||||
|
||||
gvn.hash_delete(vec_unbox);
|
||||
vec_unbox->disconnect_inputs(C);
|
||||
C->gvn_replace_by(vec_unbox, vec_val_load);
|
||||
}
|
||||
C->remove_macro_node(vec_unbox);
|
||||
}
|
||||
|
||||
void PhaseVector::eliminate_vbox_alloc_node(VectorBoxAllocateNode* vbox_alloc) {
|
||||
JVMState* jvms = clone_jvms(C, vbox_alloc);
|
||||
GraphKit kit(jvms);
|
||||
// Remove VBA, but leave a safepoint behind.
|
||||
// Otherwise, it may end up with a loop without any safepoint polls.
|
||||
kit.replace_call(vbox_alloc, kit.map(), true);
|
||||
C->remove_macro_node(vbox_alloc);
|
||||
}
|
||||
62
src/hotspot/share/opto/vector.hpp
Normal file
62
src/hotspot/share/opto/vector.hpp
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_OPTO_VECTOR_HPP
|
||||
#define SHARE_OPTO_VECTOR_HPP
|
||||
|
||||
#include "opto/node.hpp"
|
||||
#include "opto/phaseX.hpp"
|
||||
#include "opto/type.hpp"
|
||||
#include "opto/vectornode.hpp"
|
||||
|
||||
class PhaseVector : public Phase {
|
||||
private:
|
||||
PhaseIterGVN& _igvn;
|
||||
|
||||
void expand_vbox_nodes();
|
||||
void expand_vbox_node(VectorBoxNode* vec_box);
|
||||
Node* expand_vbox_node_helper(Node* vbox,
|
||||
Node* vect,
|
||||
const TypeInstPtr* box_type,
|
||||
const TypeVect* vect_type);
|
||||
Node* expand_vbox_alloc_node(VectorBoxAllocateNode* vbox_alloc,
|
||||
Node* value,
|
||||
const TypeInstPtr* box_type,
|
||||
const TypeVect* vect_type);
|
||||
void scalarize_vbox_nodes();
|
||||
void scalarize_vbox_node(VectorBoxNode* vec_box);
|
||||
void expand_vunbox_nodes();
|
||||
void expand_vunbox_node(VectorUnboxNode* vec_box);
|
||||
void eliminate_vbox_alloc_nodes();
|
||||
void eliminate_vbox_alloc_node(VectorBoxAllocateNode* vbox_alloc);
|
||||
void do_cleanup();
|
||||
void scalarize_vector_boxes();
|
||||
void expand_vector_boxes();
|
||||
|
||||
public:
|
||||
PhaseVector(PhaseIterGVN& igvn) : Phase(Vector), _igvn(igvn) {}
|
||||
void optimize_vector_boxes();
|
||||
};
|
||||
|
||||
#endif // SHARE_OPTO_VECTOR_HPP
|
||||
1594
src/hotspot/share/opto/vectorIntrinsics.cpp
Normal file
1594
src/hotspot/share/opto/vectorIntrinsics.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -120,12 +120,51 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
case Op_AbsL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
return Op_AbsVL;
|
||||
case Op_MinI:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR: return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT: return Op_MinV;
|
||||
default: ShouldNotReachHere(); return 0;
|
||||
}
|
||||
case Op_MinL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
return Op_MinV;
|
||||
case Op_MinF:
|
||||
assert(bt == T_FLOAT, "must be");
|
||||
return Op_MinV;
|
||||
case Op_MinD:
|
||||
assert(bt == T_DOUBLE, "must be");
|
||||
return Op_MinV;
|
||||
case Op_MaxI:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR: return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT: return Op_MaxV;
|
||||
default: ShouldNotReachHere(); return 0;
|
||||
}
|
||||
case Op_MaxL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
return Op_MaxV;
|
||||
case Op_MaxF:
|
||||
assert(bt == T_FLOAT, "must be");
|
||||
return Op_MaxV;
|
||||
case Op_MaxD:
|
||||
assert(bt == T_DOUBLE, "must be");
|
||||
return Op_MaxV;
|
||||
case Op_AbsF:
|
||||
assert(bt == T_FLOAT, "must be");
|
||||
return Op_AbsVF;
|
||||
case Op_AbsD:
|
||||
assert(bt == T_DOUBLE, "must be");
|
||||
return Op_AbsVD;
|
||||
case Op_NegI:
|
||||
assert(bt == T_INT, "must be");
|
||||
return Op_NegVI;
|
||||
case Op_NegF:
|
||||
assert(bt == T_FLOAT, "must be");
|
||||
return Op_NegVF;
|
||||
@ -178,6 +217,12 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
case Op_RShiftL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
return Op_RShiftVL;
|
||||
case Op_URShiftB:
|
||||
assert(bt == T_BYTE, "must be");
|
||||
return Op_URShiftVB;
|
||||
case Op_URShiftS:
|
||||
assert(bt == T_SHORT, "must be");
|
||||
return Op_URShiftVS;
|
||||
case Op_URShiftI:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:return Op_URShiftVB;
|
||||
@ -203,18 +248,6 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
case Op_XorI:
|
||||
case Op_XorL:
|
||||
return Op_XorV;
|
||||
case Op_MinF:
|
||||
assert(bt == T_FLOAT, "must be");
|
||||
return Op_MinV;
|
||||
case Op_MinD:
|
||||
assert(bt == T_DOUBLE, "must be");
|
||||
return Op_MinV;
|
||||
case Op_MaxF:
|
||||
assert(bt == T_FLOAT, "must be");
|
||||
return Op_MaxV;
|
||||
case Op_MaxD:
|
||||
assert(bt == T_DOUBLE, "must be");
|
||||
return Op_MaxV;
|
||||
|
||||
case Op_LoadB:
|
||||
case Op_LoadUB:
|
||||
@ -241,6 +274,28 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
}
|
||||
}
|
||||
|
||||
int VectorNode::replicate_opcode(BasicType bt) {
|
||||
switch(bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_BYTE:
|
||||
return Op_ReplicateB;
|
||||
case T_SHORT:
|
||||
case T_CHAR:
|
||||
return Op_ReplicateS;
|
||||
case T_INT:
|
||||
return Op_ReplicateI;
|
||||
case T_LONG:
|
||||
return Op_ReplicateL;
|
||||
case T_FLOAT:
|
||||
return Op_ReplicateF;
|
||||
case T_DOUBLE:
|
||||
return Op_ReplicateD;
|
||||
default:
|
||||
assert(false, "wrong type: %s", type2name(bt));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Also used to check if the code generator
|
||||
// supports the vector operation.
|
||||
bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
|
||||
@ -331,6 +386,16 @@ bool VectorNode::is_shift(Node* n) {
|
||||
}
|
||||
}
|
||||
|
||||
bool VectorNode::is_vshift_cnt(Node* n) {
|
||||
switch (n->Opcode()) {
|
||||
case Op_LShiftCntV:
|
||||
case Op_RShiftCntV:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if input is loop invariant vector.
|
||||
bool VectorNode::is_invariant_vector(Node* n) {
|
||||
// Only Replicate vector nodes are loop invariant for now.
|
||||
@ -397,12 +462,10 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) {
|
||||
}
|
||||
}
|
||||
|
||||
// Return the vector version of a scalar operation node.
|
||||
VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType bt) {
|
||||
const TypeVect* vt = TypeVect::make(bt, vlen);
|
||||
int vopc = VectorNode::opcode(opc, bt);
|
||||
// Make a vector node for binary operation
|
||||
VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt) {
|
||||
// This method should not be called for unimplemented vectors.
|
||||
guarantee(vopc > 0, "Vector for '%s' is not implemented", NodeClassNames[opc]);
|
||||
guarantee(vopc > 0, "vopc must be > 0");
|
||||
switch (vopc) {
|
||||
case Op_AddVB: return new AddVBNode(n1, n2, vt);
|
||||
case Op_AddVS: return new AddVSNode(n1, n2, vt);
|
||||
@ -428,13 +491,17 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType b
|
||||
case Op_DivVF: return new DivVFNode(n1, n2, vt);
|
||||
case Op_DivVD: return new DivVDNode(n1, n2, vt);
|
||||
|
||||
case Op_MinV: return new MinVNode(n1, n2, vt);
|
||||
case Op_MaxV: return new MaxVNode(n1, n2, vt);
|
||||
|
||||
case Op_AbsVF: return new AbsVFNode(n1, vt);
|
||||
case Op_AbsVD: return new AbsVDNode(n1, vt);
|
||||
case Op_AbsVB: return new AbsVBNode(n1, vt);
|
||||
case Op_AbsVS: return new AbsVSNode(n1, vt);
|
||||
case Op_AbsVI: return new AbsVINode(n1, vt);
|
||||
case Op_AbsVL: return new AbsVLNode(n1, vt);
|
||||
case Op_AbsVF: return new AbsVFNode(n1, vt);
|
||||
case Op_AbsVD: return new AbsVDNode(n1, vt);
|
||||
|
||||
case Op_NegVI: return new NegVINode(n1, vt);
|
||||
case Op_NegVF: return new NegVFNode(n1, vt);
|
||||
case Op_NegVD: return new NegVDNode(n1, vt);
|
||||
|
||||
@ -464,9 +531,6 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType b
|
||||
case Op_OrV: return new OrVNode (n1, n2, vt);
|
||||
case Op_XorV: return new XorVNode(n1, n2, vt);
|
||||
|
||||
case Op_MinV: return new MinVNode(n1, n2, vt);
|
||||
case Op_MaxV: return new MaxVNode(n1, n2, vt);
|
||||
|
||||
case Op_RoundDoubleModeV: return new RoundDoubleModeVNode(n1, n2, vt);
|
||||
|
||||
case Op_MulAddVS2VI: return new MulAddVS2VINode(n1, n2, vt);
|
||||
@ -476,11 +540,19 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType b
|
||||
}
|
||||
}
|
||||
|
||||
VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, BasicType bt) {
|
||||
// Return the vector version of a scalar binary operation node.
|
||||
VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType bt) {
|
||||
const TypeVect* vt = TypeVect::make(bt, vlen);
|
||||
int vopc = VectorNode::opcode(opc, bt);
|
||||
// This method should not be called for unimplemented vectors.
|
||||
guarantee(vopc > 0, "Vector for '%s' is not implemented", NodeClassNames[opc]);
|
||||
return make(vopc, n1, n2, vt);
|
||||
}
|
||||
|
||||
// Make a vector node for ternary operation
|
||||
VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, Node* n3, const TypeVect* vt) {
|
||||
// This method should not be called for unimplemented vectors.
|
||||
guarantee(vopc > 0, "vopc must be > 0");
|
||||
switch (vopc) {
|
||||
case Op_FmaVD: return new FmaVDNode(n1, n2, n3, vt);
|
||||
case Op_FmaVF: return new FmaVFNode(n1, n2, n3, vt);
|
||||
@ -490,6 +562,15 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, B
|
||||
}
|
||||
}
|
||||
|
||||
// Return the vector version of a scalar ternary operation node.
|
||||
VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, BasicType bt) {
|
||||
const TypeVect* vt = TypeVect::make(bt, vlen);
|
||||
int vopc = VectorNode::opcode(opc, bt);
|
||||
// This method should not be called for unimplemented vectors.
|
||||
guarantee(vopc > 0, "Vector for '%s' is not implemented", NodeClassNames[opc]);
|
||||
return make(vopc, n1, n2, n3, vt);
|
||||
}
|
||||
|
||||
// Scalar promotion
|
||||
VectorNode* VectorNode::scalar2vector(Node* s, uint vlen, const Type* opd_t) {
|
||||
BasicType bt = opd_t->array_element_basic_type();
|
||||
@ -516,21 +597,22 @@ VectorNode* VectorNode::scalar2vector(Node* s, uint vlen, const Type* opd_t) {
|
||||
}
|
||||
}
|
||||
|
||||
VectorNode* VectorNode::shift_count(Node* shift, Node* cnt, uint vlen, BasicType bt) {
|
||||
assert(VectorNode::is_shift(shift), "sanity");
|
||||
VectorNode* VectorNode::shift_count(int opc, Node* cnt, uint vlen, BasicType bt) {
|
||||
// Match shift count type with shift vector type.
|
||||
const TypeVect* vt = TypeVect::make(bt, vlen);
|
||||
switch (shift->Opcode()) {
|
||||
switch (opc) {
|
||||
case Op_LShiftI:
|
||||
case Op_LShiftL:
|
||||
return new LShiftCntVNode(cnt, vt);
|
||||
case Op_RShiftI:
|
||||
case Op_RShiftL:
|
||||
case Op_URShiftB:
|
||||
case Op_URShiftS:
|
||||
case Op_URShiftI:
|
||||
case Op_URShiftL:
|
||||
return new RShiftCntVNode(cnt, vt);
|
||||
default:
|
||||
fatal("Missed vector creation for '%s'", NodeClassNames[shift->Opcode()]);
|
||||
fatal("Missed vector creation for '%s'", NodeClassNames[opc]);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -677,29 +759,37 @@ StoreVectorNode* StoreVectorNode::make(int opc, Node* ctl, Node* mem,
|
||||
return new StoreVectorNode(ctl, mem, adr, atyp, val);
|
||||
}
|
||||
|
||||
int ExtractNode::opcode(BasicType bt) {
|
||||
switch (bt) {
|
||||
case T_BOOLEAN: return Op_ExtractUB;
|
||||
case T_BYTE: return Op_ExtractB;
|
||||
case T_CHAR: return Op_ExtractC;
|
||||
case T_SHORT: return Op_ExtractS;
|
||||
case T_INT: return Op_ExtractI;
|
||||
case T_LONG: return Op_ExtractL;
|
||||
case T_FLOAT: return Op_ExtractF;
|
||||
case T_DOUBLE: return Op_ExtractD;
|
||||
default:
|
||||
assert(false, "wrong type: %s", type2name(bt));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract a scalar element of vector.
|
||||
Node* ExtractNode::make(Node* v, uint position, BasicType bt) {
|
||||
assert((int)position < Matcher::max_vector_size(bt), "pos in range");
|
||||
ConINode* pos = ConINode::make((int)position);
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
return new ExtractUBNode(v, pos);
|
||||
case T_BYTE:
|
||||
return new ExtractBNode(v, pos);
|
||||
case T_CHAR:
|
||||
return new ExtractCNode(v, pos);
|
||||
case T_SHORT:
|
||||
return new ExtractSNode(v, pos);
|
||||
case T_INT:
|
||||
return new ExtractINode(v, pos);
|
||||
case T_LONG:
|
||||
return new ExtractLNode(v, pos);
|
||||
case T_FLOAT:
|
||||
return new ExtractFNode(v, pos);
|
||||
case T_DOUBLE:
|
||||
return new ExtractDNode(v, pos);
|
||||
case T_BOOLEAN: return new ExtractUBNode(v, pos);
|
||||
case T_BYTE: return new ExtractBNode(v, pos);
|
||||
case T_CHAR: return new ExtractCNode(v, pos);
|
||||
case T_SHORT: return new ExtractSNode(v, pos);
|
||||
case T_INT: return new ExtractINode(v, pos);
|
||||
case T_LONG: return new ExtractLNode(v, pos);
|
||||
case T_FLOAT: return new ExtractFNode(v, pos);
|
||||
case T_DOUBLE: return new ExtractDNode(v, pos);
|
||||
default:
|
||||
fatal("Type '%s' is not supported for vectors", type2name(bt));
|
||||
assert(false, "wrong type: %s", type2name(bt));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -708,8 +798,16 @@ int ReductionNode::opcode(int opc, BasicType bt) {
|
||||
int vopc = opc;
|
||||
switch (opc) {
|
||||
case Op_AddI:
|
||||
assert(bt == T_INT, "must be");
|
||||
vopc = Op_AddReductionVI;
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR: return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
vopc = Op_AddReductionVI;
|
||||
break;
|
||||
default: ShouldNotReachHere(); return 0;
|
||||
}
|
||||
break;
|
||||
case Op_AddL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
@ -724,8 +822,16 @@ int ReductionNode::opcode(int opc, BasicType bt) {
|
||||
vopc = Op_AddReductionVD;
|
||||
break;
|
||||
case Op_MulI:
|
||||
assert(bt == T_INT, "must be");
|
||||
vopc = Op_MulReductionVI;
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR: return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
vopc = Op_MulReductionVI;
|
||||
break;
|
||||
default: ShouldNotReachHere(); return 0;
|
||||
}
|
||||
break;
|
||||
case Op_MulL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
@ -739,6 +845,22 @@ int ReductionNode::opcode(int opc, BasicType bt) {
|
||||
assert(bt == T_DOUBLE, "must be");
|
||||
vopc = Op_MulReductionVD;
|
||||
break;
|
||||
case Op_MinI:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR: return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
vopc = Op_MinReductionV;
|
||||
break;
|
||||
default: ShouldNotReachHere(); return 0;
|
||||
}
|
||||
break;
|
||||
case Op_MinL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
vopc = Op_MinReductionV;
|
||||
break;
|
||||
case Op_MinF:
|
||||
assert(bt == T_FLOAT, "must be");
|
||||
vopc = Op_MinReductionV;
|
||||
@ -747,6 +869,22 @@ int ReductionNode::opcode(int opc, BasicType bt) {
|
||||
assert(bt == T_DOUBLE, "must be");
|
||||
vopc = Op_MinReductionV;
|
||||
break;
|
||||
case Op_MaxI:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR: return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
vopc = Op_MaxReductionV;
|
||||
break;
|
||||
default: ShouldNotReachHere(); return 0;
|
||||
}
|
||||
break;
|
||||
case Op_MaxL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
vopc = Op_MaxReductionV;
|
||||
break;
|
||||
case Op_MaxF:
|
||||
assert(bt == T_FLOAT, "must be");
|
||||
vopc = Op_MaxReductionV;
|
||||
@ -756,24 +894,48 @@ int ReductionNode::opcode(int opc, BasicType bt) {
|
||||
vopc = Op_MaxReductionV;
|
||||
break;
|
||||
case Op_AndI:
|
||||
assert(bt == T_INT, "must be");
|
||||
vopc = Op_AndReductionV;
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR: return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
vopc = Op_AndReductionV;
|
||||
break;
|
||||
default: ShouldNotReachHere(); return 0;
|
||||
}
|
||||
break;
|
||||
case Op_AndL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
vopc = Op_AndReductionV;
|
||||
break;
|
||||
case Op_OrI:
|
||||
assert(bt == T_INT, "must be");
|
||||
vopc = Op_OrReductionV;
|
||||
switch(bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR: return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
vopc = Op_OrReductionV;
|
||||
break;
|
||||
default: ShouldNotReachHere(); return 0;
|
||||
}
|
||||
break;
|
||||
case Op_OrL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
vopc = Op_OrReductionV;
|
||||
break;
|
||||
case Op_XorI:
|
||||
assert(bt == T_INT, "must be");
|
||||
vopc = Op_XorReductionV;
|
||||
switch(bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR: return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
vopc = Op_XorReductionV;
|
||||
break;
|
||||
default: ShouldNotReachHere(); return 0;
|
||||
}
|
||||
break;
|
||||
case Op_XorL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
@ -808,11 +970,116 @@ ReductionNode* ReductionNode::make(int opc, Node *ctrl, Node* n1, Node* n2, Basi
|
||||
case Op_OrReductionV: return new OrReductionVNode(ctrl, n1, n2);
|
||||
case Op_XorReductionV: return new XorReductionVNode(ctrl, n1, n2);
|
||||
default:
|
||||
fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
|
||||
assert(false, "unknown node: %s", NodeClassNames[vopc]);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
VectorStoreMaskNode* VectorStoreMaskNode::make(PhaseGVN& gvn, Node* in, BasicType in_type, uint num_elem) {
|
||||
assert(in->bottom_type()->isa_vect(), "sanity");
|
||||
const TypeVect* vt = TypeVect::make(T_BOOLEAN, num_elem);
|
||||
int elem_size = type2aelembytes(in_type);
|
||||
return new VectorStoreMaskNode(in, gvn.intcon(elem_size), vt);
|
||||
}
|
||||
|
||||
VectorCastNode* VectorCastNode::make(int vopc, Node* n1, BasicType bt, uint vlen) {
|
||||
const TypeVect* vt = TypeVect::make(bt, vlen);
|
||||
switch (vopc) {
|
||||
case Op_VectorCastB2X: return new VectorCastB2XNode(n1, vt);
|
||||
case Op_VectorCastS2X: return new VectorCastS2XNode(n1, vt);
|
||||
case Op_VectorCastI2X: return new VectorCastI2XNode(n1, vt);
|
||||
case Op_VectorCastL2X: return new VectorCastL2XNode(n1, vt);
|
||||
case Op_VectorCastF2X: return new VectorCastF2XNode(n1, vt);
|
||||
case Op_VectorCastD2X: return new VectorCastD2XNode(n1, vt);
|
||||
default:
|
||||
assert(false, "unknown node: %s", NodeClassNames[vopc]);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int VectorCastNode::opcode(BasicType bt) {
|
||||
switch (bt) {
|
||||
case T_BYTE: return Op_VectorCastB2X;
|
||||
case T_SHORT: return Op_VectorCastS2X;
|
||||
case T_INT: return Op_VectorCastI2X;
|
||||
case T_LONG: return Op_VectorCastL2X;
|
||||
case T_FLOAT: return Op_VectorCastF2X;
|
||||
case T_DOUBLE: return Op_VectorCastD2X;
|
||||
default:
|
||||
assert(false, "unknown type: %s", type2name(bt));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt) {
|
||||
int vopc = opcode(opc, bt);
|
||||
guarantee(vopc != opc, "Vector reduction for '%s' is not implemented", NodeClassNames[opc]);
|
||||
|
||||
switch (vopc) {
|
||||
case Op_AndReductionV:
|
||||
switch (bt) {
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
return gvn.makecon(TypeInt::MINUS_1);
|
||||
case T_LONG:
|
||||
return gvn.makecon(TypeLong::MINUS_1);
|
||||
default:
|
||||
fatal("Missed vector creation for '%s' as the basic type is not correct.", NodeClassNames[vopc]);
|
||||
return NULL;
|
||||
}
|
||||
break;
|
||||
case Op_AddReductionVI: // fallthrough
|
||||
case Op_AddReductionVL: // fallthrough
|
||||
case Op_AddReductionVF: // fallthrough
|
||||
case Op_AddReductionVD:
|
||||
case Op_OrReductionV:
|
||||
case Op_XorReductionV:
|
||||
return gvn.zerocon(bt);
|
||||
case Op_MulReductionVI:
|
||||
return gvn.makecon(TypeInt::ONE);
|
||||
case Op_MulReductionVL:
|
||||
return gvn.makecon(TypeLong::ONE);
|
||||
case Op_MulReductionVF:
|
||||
return gvn.makecon(TypeF::ONE);
|
||||
case Op_MulReductionVD:
|
||||
return gvn.makecon(TypeD::ONE);
|
||||
case Op_MinReductionV:
|
||||
switch (bt) {
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
return gvn.makecon(TypeInt::MAX);
|
||||
case T_LONG:
|
||||
return gvn.makecon(TypeLong::MAX);
|
||||
case T_FLOAT:
|
||||
return gvn.makecon(TypeF::POS_INF);
|
||||
case T_DOUBLE:
|
||||
return gvn.makecon(TypeD::POS_INF);
|
||||
default: Unimplemented(); return NULL;
|
||||
}
|
||||
break;
|
||||
case Op_MaxReductionV:
|
||||
switch (bt) {
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
return gvn.makecon(TypeInt::MIN);
|
||||
case T_LONG:
|
||||
return gvn.makecon(TypeLong::MIN);
|
||||
case T_FLOAT:
|
||||
return gvn.makecon(TypeF::NEG_INF);
|
||||
case T_DOUBLE:
|
||||
return gvn.makecon(TypeD::NEG_INF);
|
||||
default: Unimplemented(); return NULL;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
|
||||
if (is_java_primitive(bt) &&
|
||||
(vlen > 1) && is_power_of_2(vlen) &&
|
||||
@ -824,7 +1091,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
|
||||
}
|
||||
|
||||
MacroLogicVNode* MacroLogicVNode::make(PhaseGVN& gvn, Node* in1, Node* in2, Node* in3,
|
||||
uint truth_table, const TypeVect* vt) {
|
||||
uint truth_table, const TypeVect* vt) {
|
||||
assert(truth_table <= 0xFF, "invalid");
|
||||
assert(in1->bottom_type()->is_vect()->length_in_bytes() == vt->length_in_bytes(), "mismatch");
|
||||
assert(in2->bottom_type()->is_vect()->length_in_bytes() == vt->length_in_bytes(), "mismatch");
|
||||
@ -895,3 +1162,51 @@ Node* RotateRightVNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void VectorMaskCmpNode::dump_spec(outputStream *st) const {
|
||||
st->print(" %d #", _predicate); _type->dump_on(st);
|
||||
}
|
||||
#endif // PRODUCT
|
||||
|
||||
Node* VectorReinterpretNode::Identity(PhaseGVN *phase) {
|
||||
Node* n = in(1);
|
||||
if (n->Opcode() == Op_VectorReinterpret) {
|
||||
if (Type::cmp(bottom_type(), n->in(1)->bottom_type()) == 0) {
|
||||
return n->in(1);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
Node* VectorInsertNode::make(Node* vec, Node* new_val, int position) {
|
||||
assert(position < (int)vec->bottom_type()->is_vect()->length(), "pos in range");
|
||||
ConINode* pos = ConINode::make(position);
|
||||
return new VectorInsertNode(vec, new_val, pos, vec->bottom_type()->is_vect());
|
||||
}
|
||||
|
||||
Node* VectorUnboxNode::Identity(PhaseGVN *phase) {
|
||||
Node* n = obj()->uncast();
|
||||
if (EnableVectorReboxing && n->Opcode() == Op_VectorBox) {
|
||||
if (Type::cmp(bottom_type(), n->in(VectorBoxNode::Value)->bottom_type()) == 0) {
|
||||
return n->in(VectorBoxNode::Value);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
const TypeFunc* VectorBoxNode::vec_box_type(const TypeInstPtr* box_type) {
|
||||
const Type** fields = TypeTuple::fields(0);
|
||||
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms, fields);
|
||||
|
||||
fields = TypeTuple::fields(1);
|
||||
fields[TypeFunc::Parms+0] = box_type;
|
||||
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
|
||||
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void VectorBoxAllocateNode::dump_spec(outputStream *st) const {
|
||||
CallStaticJavaNode::dump_spec(st);
|
||||
}
|
||||
#endif // !PRODUCT
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#ifndef SHARE_OPTO_VECTORNODE_HPP
|
||||
#define SHARE_OPTO_VECTORNODE_HPP
|
||||
|
||||
#include "opto/callnode.hpp"
|
||||
#include "opto/matcher.hpp"
|
||||
#include "opto/memnode.hpp"
|
||||
#include "opto/node.hpp"
|
||||
@ -68,13 +69,17 @@ class VectorNode : public TypeNode {
|
||||
virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(vect_type()->length_in_bytes()); }
|
||||
|
||||
static VectorNode* scalar2vector(Node* s, uint vlen, const Type* opd_t);
|
||||
static VectorNode* shift_count(Node* shift, Node* cnt, uint vlen, BasicType bt);
|
||||
static VectorNode* shift_count(int opc, Node* cnt, uint vlen, BasicType bt);
|
||||
static VectorNode* make(int opc, Node* n1, Node* n2, uint vlen, BasicType bt);
|
||||
static VectorNode* make(int vopc, Node* n1, Node* n2, const TypeVect* vt);
|
||||
static VectorNode* make(int opc, Node* n1, Node* n2, Node* n3, uint vlen, BasicType bt);
|
||||
static VectorNode* make(int vopc, Node* n1, Node* n2, Node* n3, const TypeVect* vt);
|
||||
|
||||
static int opcode(int opc, BasicType bt);
|
||||
static int replicate_opcode(BasicType bt);
|
||||
static bool implemented(int opc, uint vlen, BasicType bt);
|
||||
static bool is_shift(Node* n);
|
||||
static bool is_vshift_cnt(Node* n);
|
||||
static bool is_type_transition_short_to_int(Node* n);
|
||||
static bool is_type_transition_to_int(Node* n);
|
||||
static bool is_muladds2i(Node* n);
|
||||
@ -160,9 +165,10 @@ class ReductionNode : public Node {
|
||||
static ReductionNode* make(int opc, Node *ctrl, Node* in1, Node* in2, BasicType bt);
|
||||
static int opcode(int opc, BasicType bt);
|
||||
static bool implemented(int opc, uint vlen, BasicType bt);
|
||||
static Node* make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt);
|
||||
|
||||
virtual const Type* bottom_type() const {
|
||||
BasicType vbt = in(2)->bottom_type()->is_vect()->element_basic_type();
|
||||
BasicType vbt = in(1)->bottom_type()->basic_type();
|
||||
return Type::get_const_basic_type(vbt);
|
||||
}
|
||||
|
||||
@ -172,13 +178,11 @@ class ReductionNode : public Node {
|
||||
};
|
||||
|
||||
//------------------------------AddReductionVINode--------------------------------------
|
||||
// Vector add int as a reduction
|
||||
// Vector add byte, short and int as a reduction
|
||||
class AddReductionVINode : public ReductionNode {
|
||||
public:
|
||||
AddReductionVINode(Node * ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type* bottom_type() const { return TypeInt::INT; }
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
//------------------------------AddReductionVLNode--------------------------------------
|
||||
@ -187,8 +191,6 @@ class AddReductionVLNode : public ReductionNode {
|
||||
public:
|
||||
AddReductionVLNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type* bottom_type() const { return TypeLong::LONG; }
|
||||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
};
|
||||
|
||||
//------------------------------AddReductionVFNode--------------------------------------
|
||||
@ -197,8 +199,6 @@ class AddReductionVFNode : public ReductionNode {
|
||||
public:
|
||||
AddReductionVFNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type* bottom_type() const { return Type::FLOAT; }
|
||||
virtual uint ideal_reg() const { return Op_RegF; }
|
||||
};
|
||||
|
||||
//------------------------------AddReductionVDNode--------------------------------------
|
||||
@ -207,8 +207,6 @@ class AddReductionVDNode : public ReductionNode {
|
||||
public:
|
||||
AddReductionVDNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type* bottom_type() const { return Type::DOUBLE; }
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
};
|
||||
|
||||
//------------------------------SubVBNode--------------------------------------
|
||||
@ -348,13 +346,11 @@ public:
|
||||
};
|
||||
|
||||
//------------------------------MulReductionVINode--------------------------------------
|
||||
// Vector multiply int as a reduction
|
||||
// Vector multiply byte, short and int as a reduction
|
||||
class MulReductionVINode : public ReductionNode {
|
||||
public:
|
||||
MulReductionVINode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type* bottom_type() const { return TypeInt::INT; }
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
//------------------------------MulReductionVLNode--------------------------------------
|
||||
@ -363,8 +359,6 @@ class MulReductionVLNode : public ReductionNode {
|
||||
public:
|
||||
MulReductionVLNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type* bottom_type() const { return TypeLong::LONG; }
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
//------------------------------MulReductionVFNode--------------------------------------
|
||||
@ -373,8 +367,6 @@ class MulReductionVFNode : public ReductionNode {
|
||||
public:
|
||||
MulReductionVFNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type* bottom_type() const { return Type::FLOAT; }
|
||||
virtual uint ideal_reg() const { return Op_RegF; }
|
||||
};
|
||||
|
||||
//------------------------------MulReductionVDNode--------------------------------------
|
||||
@ -383,8 +375,6 @@ class MulReductionVDNode : public ReductionNode {
|
||||
public:
|
||||
MulReductionVDNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type* bottom_type() const { return Type::DOUBLE; }
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
};
|
||||
|
||||
//------------------------------DivVFNode--------------------------------------
|
||||
@ -419,10 +409,26 @@ public:
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------MinVNode--------------------------------------
|
||||
// Vector Min
|
||||
class MinVNode : public VectorNode {
|
||||
public:
|
||||
MinVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------MaxVNode--------------------------------------
|
||||
// Vector Max
|
||||
class MaxVNode : public VectorNode {
|
||||
public:
|
||||
MaxVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------AbsVINode--------------------------------------
|
||||
// Vector Abs int
|
||||
class AbsVINode : public VectorNode {
|
||||
public:
|
||||
public:
|
||||
AbsVINode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
@ -451,6 +457,14 @@ class AbsVDNode : public VectorNode {
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------NegVINode--------------------------------------
|
||||
// Vector Neg int
|
||||
class NegVINode : public VectorNode {
|
||||
public:
|
||||
NegVINode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------NegVFNode--------------------------------------
|
||||
// Vector Neg float
|
||||
class NegVFNode : public VectorNode {
|
||||
@ -618,14 +632,38 @@ class AndVNode : public VectorNode {
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------AndReductionVNode--------------------------------------
|
||||
// Vector and byte, short, int, long as a reduction
|
||||
class AndReductionVNode : public ReductionNode {
|
||||
public:
|
||||
AndReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------OrVNode---------------------------------------
|
||||
// Vector or integer
|
||||
// Vector or byte, short, int, long as a reduction
|
||||
class OrVNode : public VectorNode {
|
||||
public:
|
||||
OrVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------OrReductionVNode--------------------------------------
|
||||
// Vector xor byte, short, int, long as a reduction
|
||||
class OrReductionVNode : public ReductionNode {
|
||||
public:
|
||||
OrReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------XorReductionVNode--------------------------------------
|
||||
// Vector and int, long as a reduction
|
||||
class XorReductionVNode : public ReductionNode {
|
||||
public:
|
||||
XorReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------XorVNode---------------------------------------
|
||||
// Vector xor integer
|
||||
class XorVNode : public VectorNode {
|
||||
@ -634,48 +672,8 @@ class XorVNode : public VectorNode {
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------AndReductionVNode--------------------------------------
|
||||
// Vector and int, long as a reduction
|
||||
class AndReductionVNode : public ReductionNode {
|
||||
public:
|
||||
AndReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------OrReductionVNode--------------------------------------
|
||||
// Vector or int, long as a reduction
|
||||
class OrReductionVNode : public ReductionNode {
|
||||
public:
|
||||
OrReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------XorReductionVNode--------------------------------------
|
||||
// Vector xor int, long as a reduction
|
||||
class XorReductionVNode : public ReductionNode {
|
||||
public:
|
||||
XorReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------MinVNode--------------------------------------
|
||||
// Vector min
|
||||
class MinVNode : public VectorNode {
|
||||
public:
|
||||
MinVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------MaxVNode--------------------------------------
|
||||
// Vector max
|
||||
class MaxVNode : public VectorNode {
|
||||
public:
|
||||
MaxVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------MinReductionVNode--------------------------------------
|
||||
// Vector min as a reduction
|
||||
// Vector min byte, short, int, long, float, double as a reduction
|
||||
class MinReductionVNode : public ReductionNode {
|
||||
public:
|
||||
MinReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
@ -683,7 +681,7 @@ public:
|
||||
};
|
||||
|
||||
//------------------------------MaxReductionVNode--------------------------------------
|
||||
// Vector max as a reduction
|
||||
// Vector min byte, short, int, long, float, double as a reduction
|
||||
class MaxReductionVNode : public ReductionNode {
|
||||
public:
|
||||
MaxReductionVNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
|
||||
@ -720,13 +718,28 @@ class LoadVectorNode : public LoadNode {
|
||||
uint element_size(void) { return type2aelembytes(vect_type()->element_basic_type()); }
|
||||
};
|
||||
|
||||
//------------------------------LoadVectorGatherNode------------------------------
|
||||
// Load Vector from memory via index map
|
||||
class LoadVectorGatherNode : public LoadVectorNode {
|
||||
public:
|
||||
LoadVectorGatherNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt, Node* indices)
|
||||
: LoadVectorNode(c, mem, adr, at, vt) {
|
||||
init_class_id(Class_LoadVectorGather);
|
||||
assert(indices->bottom_type()->is_vect(), "indices must be in vector");
|
||||
add_req(indices);
|
||||
assert(req() == MemNode::ValueIn + 1, "match_edge expects that last input is in MemNode::ValueIn");
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
virtual uint match_edge(uint idx) const { return idx == MemNode::Address || idx == MemNode::ValueIn; }
|
||||
};
|
||||
|
||||
//------------------------------StoreVectorNode--------------------------------
|
||||
// Store Vector to memory
|
||||
class StoreVectorNode : public StoreNode {
|
||||
public:
|
||||
StoreVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
|
||||
: StoreNode(c, mem, adr, at, val, MemNode::unordered) {
|
||||
assert(val->is_Vector() || val->is_LoadVector(), "sanity");
|
||||
init_class_id(Class_StoreVector);
|
||||
set_mismatched_access();
|
||||
}
|
||||
@ -747,6 +760,23 @@ class StoreVectorNode : public StoreNode {
|
||||
uint element_size(void) { return type2aelembytes(vect_type()->element_basic_type()); }
|
||||
};
|
||||
|
||||
//------------------------------StoreVectorScatterNode------------------------------
|
||||
// Store Vector into memory via index map
|
||||
|
||||
class StoreVectorScatterNode : public StoreVectorNode {
|
||||
public:
|
||||
StoreVectorScatterNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val, Node* indices)
|
||||
: StoreVectorNode(c, mem, adr, at, val) {
|
||||
init_class_id(Class_StoreVectorScatter);
|
||||
assert(indices->bottom_type()->is_vect(), "indices must be in vector");
|
||||
add_req(indices);
|
||||
assert(req() == MemNode::ValueIn + 2, "match_edge expects that last input is in MemNode::ValueIn+1");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
virtual uint match_edge(uint idx) const { return idx == MemNode::Address ||
|
||||
idx == MemNode::ValueIn ||
|
||||
idx == MemNode::ValueIn + 1; }
|
||||
};
|
||||
|
||||
//=========================Promote_Scalar_to_Vector============================
|
||||
|
||||
@ -888,6 +918,12 @@ class Pack2DNode : public PackNode {
|
||||
};
|
||||
|
||||
|
||||
class VectorLoadConstNode : public VectorNode {
|
||||
public:
|
||||
VectorLoadConstNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//========================Extract_Scalar_from_Vector===========================
|
||||
|
||||
//------------------------------ExtractNode------------------------------------
|
||||
@ -901,6 +937,7 @@ class ExtractNode : public Node {
|
||||
uint pos() const { return in(2)->get_int(); }
|
||||
|
||||
static Node* make(Node* v, uint position, BasicType bt);
|
||||
static int opcode(BasicType bt);
|
||||
};
|
||||
|
||||
//------------------------------ExtractBNode-----------------------------------
|
||||
@ -929,7 +966,7 @@ class ExtractCNode : public ExtractNode {
|
||||
public:
|
||||
ExtractCNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type *bottom_type() const { return TypeInt::INT; }
|
||||
virtual const Type *bottom_type() const { return TypeInt::CHAR; }
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
@ -939,7 +976,7 @@ class ExtractSNode : public ExtractNode {
|
||||
public:
|
||||
ExtractSNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type *bottom_type() const { return TypeInt::INT; }
|
||||
virtual const Type *bottom_type() const { return TypeInt::SHORT; }
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
@ -1007,6 +1044,286 @@ public:
|
||||
static MacroLogicVNode* make(PhaseGVN& igvn, Node* in1, Node* in2, Node* in3, uint truth_table, const TypeVect* vt);
|
||||
};
|
||||
|
||||
class VectorMaskCmpNode : public VectorNode {
|
||||
private:
|
||||
BoolTest::mask _predicate;
|
||||
|
||||
protected:
|
||||
uint size_of() const { return sizeof(*this); }
|
||||
|
||||
public:
|
||||
VectorMaskCmpNode(BoolTest::mask predicate, Node* in1, Node* in2, ConINode* predicate_node, const TypeVect* vt) :
|
||||
VectorNode(in1, in2, predicate_node, vt),
|
||||
_predicate(predicate) {
|
||||
assert(in1->bottom_type()->is_vect()->element_basic_type() == in2->bottom_type()->is_vect()->element_basic_type(),
|
||||
"VectorMaskCmp inputs must have same type for elements");
|
||||
assert(in1->bottom_type()->is_vect()->length() == in2->bottom_type()->is_vect()->length(),
|
||||
"VectorMaskCmp inputs must have same number of elements");
|
||||
init_class_id(Class_VectorMaskCmp);
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
virtual uint hash() const { return VectorNode::hash() + _predicate; }
|
||||
virtual bool cmp( const Node &n ) const {
|
||||
return VectorNode::cmp(n) && _predicate == ((VectorMaskCmpNode&)n)._predicate;
|
||||
}
|
||||
BoolTest::mask get_predicate() { return _predicate; }
|
||||
#ifndef PRODUCT
|
||||
virtual void dump_spec(outputStream *st) const;
|
||||
#endif // !PRODUCT
|
||||
};
|
||||
|
||||
// Used to wrap other vector nodes in order to add masking functionality.
|
||||
class VectorMaskWrapperNode : public VectorNode {
|
||||
public:
|
||||
VectorMaskWrapperNode(Node* vector, Node* mask)
|
||||
: VectorNode(vector, mask, vector->bottom_type()->is_vect()) {
|
||||
assert(mask->is_VectorMaskCmp(), "VectorMaskWrapper requires that second argument be a mask");
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
Node* vector_val() const { return in(1); }
|
||||
Node* vector_mask() const { return in(2); }
|
||||
};
|
||||
|
||||
class VectorTestNode : public Node {
|
||||
private:
|
||||
BoolTest::mask _predicate;
|
||||
|
||||
protected:
|
||||
uint size_of() const { return sizeof(*this); }
|
||||
|
||||
public:
|
||||
VectorTestNode( Node *in1, Node *in2, BoolTest::mask predicate) : Node(NULL, in1, in2), _predicate(predicate) {
|
||||
assert(in1->is_Vector() || in1->is_LoadVector(), "must be vector");
|
||||
assert(in2->is_Vector() || in2->is_LoadVector(), "must be vector");
|
||||
assert(in1->bottom_type()->is_vect()->element_basic_type() == in2->bottom_type()->is_vect()->element_basic_type(),
|
||||
"same type elements are needed");
|
||||
assert(in1->bottom_type()->is_vect()->length() == in2->bottom_type()->is_vect()->length(),
|
||||
"same number of elements is needed");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
virtual uint hash() const { return Node::hash() + _predicate; }
|
||||
virtual bool cmp( const Node &n ) const {
|
||||
return Node::cmp(n) && _predicate == ((VectorTestNode&)n)._predicate;
|
||||
}
|
||||
virtual const Type *bottom_type() const { return TypeInt::BOOL; }
|
||||
virtual uint ideal_reg() const { return Op_RegI; } // TODO Should be RegFlags but due to missing comparison flags for BoolTest
|
||||
// in middle-end, we make it boolean result directly.
|
||||
BoolTest::mask get_predicate() const { return _predicate; }
|
||||
};
|
||||
|
||||
class VectorBlendNode : public VectorNode {
|
||||
public:
|
||||
VectorBlendNode(Node* vec1, Node* vec2, Node* mask)
|
||||
: VectorNode(vec1, vec2, mask, vec1->bottom_type()->is_vect()) {
|
||||
// assert(mask->is_VectorMask(), "VectorBlendNode requires that third argument be a mask");
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
Node* vec1() const { return in(1); }
|
||||
Node* vec2() const { return in(2); }
|
||||
Node* vec_mask() const { return in(3); }
|
||||
};
|
||||
|
||||
class VectorRearrangeNode : public VectorNode {
|
||||
public:
|
||||
VectorRearrangeNode(Node* vec1, Node* shuffle)
|
||||
: VectorNode(vec1, shuffle, vec1->bottom_type()->is_vect()) {
|
||||
// assert(mask->is_VectorMask(), "VectorBlendNode requires that third argument be a mask");
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
Node* vec1() const { return in(1); }
|
||||
Node* vec_shuffle() const { return in(2); }
|
||||
};
|
||||
|
||||
|
||||
class VectorLoadMaskNode : public VectorNode {
|
||||
public:
|
||||
VectorLoadMaskNode(Node* in, const TypeVect* vt)
|
||||
: VectorNode(in, vt) {
|
||||
assert(in->is_LoadVector(), "expected load vector");
|
||||
assert(in->as_LoadVector()->vect_type()->element_basic_type() == T_BOOLEAN, "must be boolean");
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class VectorLoadShuffleNode : public VectorNode {
|
||||
public:
|
||||
VectorLoadShuffleNode(Node* in, const TypeVect* vt)
|
||||
: VectorNode(in, vt) {
|
||||
assert(in->is_LoadVector(), "expected load vector");
|
||||
assert(in->as_LoadVector()->vect_type()->element_basic_type() == T_BYTE, "must be BYTE");
|
||||
}
|
||||
|
||||
int GetOutShuffleSize() const { return type2aelembytes(vect_type()->element_basic_type()); }
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class VectorStoreMaskNode : public VectorNode {
|
||||
protected:
|
||||
VectorStoreMaskNode(Node* in1, ConINode* in2, const TypeVect* vt)
|
||||
: VectorNode(in1, in2, vt) { }
|
||||
|
||||
public:
|
||||
virtual int Opcode() const;
|
||||
|
||||
static VectorStoreMaskNode* make(PhaseGVN& gvn, Node* in, BasicType in_type, uint num_elem);
|
||||
};
|
||||
|
||||
// This is intended for use as a simple reinterpret node that has no cast.
|
||||
class VectorReinterpretNode : public VectorNode {
|
||||
private:
|
||||
const TypeVect* _src_vt;
|
||||
protected:
|
||||
uint size_of() const { return sizeof(*this); }
|
||||
public:
|
||||
VectorReinterpretNode(Node* in, const TypeVect* src_vt, const TypeVect* dst_vt)
|
||||
: VectorNode(in, dst_vt), _src_vt(src_vt) { }
|
||||
|
||||
virtual uint hash() const { return VectorNode::hash() + _src_vt->hash(); }
|
||||
virtual bool cmp( const Node &n ) const {
|
||||
return VectorNode::cmp(n) && !Type::cmp(_src_vt,((VectorReinterpretNode&)n)._src_vt);
|
||||
}
|
||||
virtual Node *Identity(PhaseGVN *phase);
|
||||
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class VectorCastNode : public VectorNode {
|
||||
public:
|
||||
VectorCastNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
|
||||
virtual int Opcode() const;
|
||||
|
||||
static VectorCastNode* make(int vopc, Node* n1, BasicType bt, uint vlen);
|
||||
static int opcode(BasicType bt);
|
||||
static bool implemented(BasicType bt, uint vlen);
|
||||
};
|
||||
|
||||
class VectorCastB2XNode : public VectorCastNode {
|
||||
public:
|
||||
VectorCastB2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
|
||||
assert(in->bottom_type()->is_vect()->element_basic_type() == T_BYTE, "must be byte");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class VectorCastS2XNode : public VectorCastNode {
|
||||
public:
|
||||
VectorCastS2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
|
||||
assert(in->bottom_type()->is_vect()->element_basic_type() == T_SHORT, "must be short");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class VectorCastI2XNode : public VectorCastNode {
|
||||
public:
|
||||
VectorCastI2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
|
||||
assert(in->bottom_type()->is_vect()->element_basic_type() == T_INT, "must be int");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class VectorCastL2XNode : public VectorCastNode {
|
||||
public:
|
||||
VectorCastL2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
|
||||
assert(in->bottom_type()->is_vect()->element_basic_type() == T_LONG, "must be long");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class VectorCastF2XNode : public VectorCastNode {
|
||||
public:
|
||||
VectorCastF2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
|
||||
assert(in->bottom_type()->is_vect()->element_basic_type() == T_FLOAT, "must be float");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class VectorCastD2XNode : public VectorCastNode {
|
||||
public:
|
||||
VectorCastD2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
|
||||
assert(in->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE, "must be double");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class VectorInsertNode : public VectorNode {
|
||||
public:
|
||||
VectorInsertNode(Node* vsrc, Node* new_val, ConINode* pos, const TypeVect* vt) : VectorNode(vsrc, new_val, (Node*)pos, vt) {
|
||||
assert(pos->get_int() >= 0, "positive constants");
|
||||
assert(pos->get_int() < (int)vt->length(), "index must be less than vector length");
|
||||
assert(Type::cmp(vt, vsrc->bottom_type()) == 0, "input and output must be same type");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
uint pos() const { return in(3)->get_int(); }
|
||||
|
||||
static Node* make(Node* vec, Node* new_val, int position);
|
||||
};
|
||||
|
||||
class VectorBoxNode : public Node {
|
||||
private:
|
||||
const TypeInstPtr* const _box_type;
|
||||
const TypeVect* const _vec_type;
|
||||
public:
|
||||
enum {
|
||||
Box = 1,
|
||||
Value = 2
|
||||
};
|
||||
VectorBoxNode(Compile* C, Node* box, Node* val,
|
||||
const TypeInstPtr* box_type, const TypeVect* vt)
|
||||
: Node(NULL, box, val), _box_type(box_type), _vec_type(vt) {
|
||||
init_flags(Flag_is_macro);
|
||||
C->add_macro_node(this);
|
||||
}
|
||||
|
||||
const TypeInstPtr* box_type() const { assert(_box_type != NULL, ""); return _box_type; };
|
||||
const TypeVect* vec_type() const { assert(_vec_type != NULL, ""); return _vec_type; };
|
||||
|
||||
virtual int Opcode() const;
|
||||
virtual const Type* bottom_type() const { return _box_type; }
|
||||
virtual uint ideal_reg() const { return box_type()->ideal_reg(); }
|
||||
virtual uint size_of() const { return sizeof(*this); }
|
||||
|
||||
static const TypeFunc* vec_box_type(const TypeInstPtr* box_type);
|
||||
};
|
||||
|
||||
class VectorBoxAllocateNode : public CallStaticJavaNode {
|
||||
public:
|
||||
VectorBoxAllocateNode(Compile* C, const TypeInstPtr* vbox_type)
|
||||
: CallStaticJavaNode(C, VectorBoxNode::vec_box_type(vbox_type), NULL, NULL, -1) {
|
||||
init_flags(Flag_is_macro);
|
||||
C->add_macro_node(this);
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
#ifndef PRODUCT
|
||||
virtual void dump_spec(outputStream *st) const;
|
||||
#endif // !PRODUCT
|
||||
};
|
||||
|
||||
class VectorUnboxNode : public VectorNode {
|
||||
private:
|
||||
bool _shuffle_to_vector;
|
||||
protected:
|
||||
uint size_of() const { return sizeof(*this); }
|
||||
public:
|
||||
VectorUnboxNode(Compile* C, const TypeVect* vec_type, Node* obj, Node* mem, bool shuffle_to_vector)
|
||||
: VectorNode(mem, obj, vec_type) {
|
||||
_shuffle_to_vector = shuffle_to_vector;
|
||||
init_flags(Flag_is_macro);
|
||||
C->add_macro_node(this);
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
Node* obj() const { return in(2); }
|
||||
Node* mem() const { return in(1); }
|
||||
virtual Node *Identity(PhaseGVN *phase);
|
||||
bool is_shuffle_to_vector() { return _shuffle_to_vector; }
|
||||
};
|
||||
|
||||
class RotateRightVNode : public VectorNode {
|
||||
public:
|
||||
RotateRightVNode(Node* in1, Node* in2, const TypeVect* vt)
|
||||
|
||||
@ -118,6 +118,7 @@ extern "C" {
|
||||
void JNICALL JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass unsafecls);
|
||||
void JNICALL JVM_RegisterPerfMethods(JNIEnv *env, jclass perfclass);
|
||||
void JNICALL JVM_RegisterWhiteBoxMethods(JNIEnv *env, jclass wbclass);
|
||||
void JNICALL JVM_RegisterVectorSupportMethods(JNIEnv *env, jclass vsclass);
|
||||
#if INCLUDE_JVMCI
|
||||
jobject JNICALL JVM_GetJVMCIRuntime(JNIEnv *env, jclass c);
|
||||
void JNICALL JVM_RegisterJVMCINatives(JNIEnv *env, jclass compilerToVMClass);
|
||||
@ -132,6 +133,7 @@ static JNINativeMethod lookup_special_native_methods[] = {
|
||||
{ CC"Java_java_lang_invoke_MethodHandleNatives_registerNatives", NULL, FN_PTR(JVM_RegisterMethodHandleMethods) },
|
||||
{ CC"Java_jdk_internal_perf_Perf_registerNatives", NULL, FN_PTR(JVM_RegisterPerfMethods) },
|
||||
{ CC"Java_sun_hotspot_WhiteBox_registerNatives", NULL, FN_PTR(JVM_RegisterWhiteBoxMethods) },
|
||||
{ CC"Java_jdk_internal_vm_vector_VectorSupport_registerNatives", NULL, FN_PTR(JVM_RegisterVectorSupportMethods)},
|
||||
#if INCLUDE_JVMCI
|
||||
{ CC"Java_jdk_vm_ci_runtime_JVMCI_initializeRuntime", NULL, FN_PTR(JVM_GetJVMCIRuntime) },
|
||||
{ CC"Java_jdk_vm_ci_hotspot_CompilerToVM_registerNatives", NULL, FN_PTR(JVM_RegisterJVMCINatives) },
|
||||
|
||||
429
src/hotspot/share/prims/vectorSupport.cpp
Normal file
429
src/hotspot/share/prims/vectorSupport.cpp
Normal file
@ -0,0 +1,429 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "jni.h"
|
||||
#include "jvm.h"
|
||||
#include "classfile/javaClasses.inline.hpp"
|
||||
#include "code/location.hpp"
|
||||
#include "prims/vectorSupport.hpp"
|
||||
#include "runtime/fieldDescriptor.inline.hpp"
|
||||
#include "runtime/handles.inline.hpp"
|
||||
#include "runtime/interfaceSupport.inline.hpp"
|
||||
#include "runtime/jniHandles.inline.hpp"
|
||||
#include "runtime/stackValue.hpp"
|
||||
|
||||
#ifdef COMPILER2
|
||||
#include "opto/matcher.hpp" // Matcher::max_vector_size(BasicType)
|
||||
#endif // COMPILER2
|
||||
|
||||
bool VectorSupport::is_vector(Klass* klass) {
|
||||
return klass->is_subclass_of(SystemDictionary::vector_VectorPayload_klass());
|
||||
}
|
||||
|
||||
bool VectorSupport::is_vector_mask(Klass* klass) {
|
||||
return klass->is_subclass_of(SystemDictionary::vector_VectorMask_klass());
|
||||
}
|
||||
|
||||
bool VectorSupport::is_vector_shuffle(Klass* klass) {
|
||||
return klass->is_subclass_of(SystemDictionary::vector_VectorShuffle_klass());
|
||||
}
|
||||
|
||||
BasicType VectorSupport::klass2bt(InstanceKlass* ik) {
|
||||
assert(ik->is_subclass_of(SystemDictionary::vector_VectorPayload_klass()), "%s not a VectorPayload", ik->name()->as_C_string());
|
||||
fieldDescriptor fd; // find_field initializes fd if found
|
||||
// static final Class<?> ETYPE;
|
||||
Klass* holder = ik->find_field(vmSymbols::ETYPE_name(), vmSymbols::class_signature(), &fd);
|
||||
|
||||
assert(holder != NULL, "sanity");
|
||||
assert(fd.is_static(), "");
|
||||
assert(fd.offset() > 0, "");
|
||||
|
||||
if (is_vector_shuffle(ik)) {
|
||||
return T_BYTE;
|
||||
} else { // vector and mask
|
||||
oop value = ik->java_mirror()->obj_field(fd.offset());
|
||||
BasicType elem_bt = java_lang_Class::as_BasicType(value);
|
||||
return elem_bt;
|
||||
}
|
||||
}
|
||||
|
||||
jint VectorSupport::klass2length(InstanceKlass* ik) {
|
||||
fieldDescriptor fd; // find_field initializes fd if found
|
||||
// static final int VLENGTH;
|
||||
Klass* holder = ik->find_field(vmSymbols::VLENGTH_name(), vmSymbols::int_signature(), &fd);
|
||||
|
||||
assert(holder != NULL, "sanity");
|
||||
assert(fd.is_static(), "");
|
||||
assert(fd.offset() > 0, "");
|
||||
|
||||
jint vlen = ik->java_mirror()->int_field(fd.offset());
|
||||
assert(vlen > 0, "");
|
||||
return vlen;
|
||||
}
|
||||
|
||||
void VectorSupport::init_vector_array(typeArrayOop arr, BasicType elem_bt, int num_elem, address value_addr) {
|
||||
int elem_size = type2aelembytes(elem_bt);
|
||||
for (int i = 0; i < num_elem; i++) {
|
||||
switch (elem_bt) {
|
||||
case T_BYTE: {
|
||||
jbyte elem_value = *(jbyte*) (value_addr + i * elem_size);
|
||||
arr->byte_at_put(i, elem_value);
|
||||
break;
|
||||
}
|
||||
case T_SHORT: {
|
||||
jshort elem_value = *(jshort*) (value_addr + i * elem_size);
|
||||
arr->short_at_put(i, elem_value);
|
||||
break;
|
||||
}
|
||||
case T_INT: {
|
||||
jint elem_value = *(jint*) (value_addr + i * elem_size);
|
||||
arr->int_at_put(i, elem_value);
|
||||
break;
|
||||
}
|
||||
case T_LONG: {
|
||||
jlong elem_value = *(jlong*) (value_addr + i * elem_size);
|
||||
arr->long_at_put(i, elem_value);
|
||||
break;
|
||||
}
|
||||
case T_FLOAT: {
|
||||
jfloat elem_value = *(jfloat*) (value_addr + i * elem_size);
|
||||
arr->float_at_put(i, elem_value);
|
||||
break;
|
||||
}
|
||||
case T_DOUBLE: {
|
||||
jdouble elem_value = *(jdouble*) (value_addr + i * elem_size);
|
||||
arr->double_at_put(i, elem_value);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fatal("unsupported: %s", type2name(elem_bt));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VectorSupport::init_mask_array(typeArrayOop arr, BasicType elem_bt, int num_elem, address value_addr) {
|
||||
int elem_size = type2aelembytes(elem_bt);
|
||||
|
||||
for (int i = 0; i < num_elem; i++) {
|
||||
switch (elem_bt) {
|
||||
case T_BYTE: {
|
||||
jbyte elem_value = *(jbyte*) (value_addr + i * elem_size);
|
||||
arr->bool_at_put(i, elem_value != 0);
|
||||
break;
|
||||
}
|
||||
case T_SHORT: {
|
||||
jshort elem_value = *(jshort*) (value_addr + i * elem_size);
|
||||
arr->bool_at_put(i, elem_value != 0);
|
||||
break;
|
||||
}
|
||||
case T_INT: // fall-through
|
||||
case T_FLOAT: {
|
||||
jint elem_value = *(jint*) (value_addr + i * elem_size);
|
||||
arr->bool_at_put(i, elem_value != 0);
|
||||
break;
|
||||
}
|
||||
case T_LONG: // fall-through
|
||||
case T_DOUBLE: {
|
||||
jlong elem_value = *(jlong*) (value_addr + i * elem_size);
|
||||
arr->bool_at_put(i, elem_value != 0);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fatal("unsupported: %s", type2name(elem_bt));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
oop VectorSupport::allocate_vector_payload_helper(InstanceKlass* ik, BasicType elem_bt, int num_elem, address value_addr, TRAPS) {
|
||||
|
||||
bool is_mask = is_vector_mask(ik);
|
||||
|
||||
// On-heap vector values are represented as primitive arrays.
|
||||
TypeArrayKlass* tak = TypeArrayKlass::cast(Universe::typeArrayKlassObj(is_mask ? T_BOOLEAN : elem_bt));
|
||||
|
||||
typeArrayOop arr = tak->allocate(num_elem, CHECK_NULL); // safepoint
|
||||
|
||||
if (is_mask) {
|
||||
init_mask_array(arr, elem_bt, num_elem, value_addr);
|
||||
} else {
|
||||
init_vector_array(arr, elem_bt, num_elem, value_addr);
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
oop VectorSupport::allocate_vector(InstanceKlass* ik, frame* fr, RegisterMap* reg_map, ObjectValue* ov, TRAPS) {
|
||||
assert(is_vector(ik), "%s not a vector", ik->name()->as_C_string());
|
||||
assert(ov->field_size() == 1, "%s not a vector", ik->name()->as_C_string());
|
||||
|
||||
// Vector value in an aligned adjacent tuple (1, 2, 4, 8, or 16 slots).
|
||||
LocationValue* loc_value = ov->field_at(0)->as_LocationValue();
|
||||
|
||||
BasicType elem_bt = klass2bt(ik);
|
||||
int num_elem = klass2length(ik);
|
||||
|
||||
Handle vbox = ik->allocate_instance_handle(CHECK_NULL);
|
||||
|
||||
Location loc = loc_value->location();
|
||||
|
||||
oop payload = NULL;
|
||||
if (loc.type() == Location::vector) {
|
||||
address value_addr = loc.is_register()
|
||||
// Value was in a callee-save register
|
||||
? reg_map->location(VMRegImpl::as_VMReg(loc.register_number()))
|
||||
// Else value was directly saved on the stack. The frame's original stack pointer,
|
||||
// before any extension by its callee (due to Compiler1 linkage on SPARC), must be used.
|
||||
: ((address)fr->unextended_sp()) + loc.stack_offset();
|
||||
payload = allocate_vector_payload_helper(ik, elem_bt, num_elem, value_addr, CHECK_NULL); // safepoint
|
||||
} else {
|
||||
// assert(false, "interesting");
|
||||
StackValue* value = StackValue::create_stack_value(fr, reg_map, loc_value);
|
||||
payload = value->get_obj()();
|
||||
}
|
||||
vector_VectorPayload::set_payload(vbox(), payload);
|
||||
return vbox();
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
int VectorSupport::vop2ideal(jint id, BasicType bt) {
|
||||
VectorOperation vop = (VectorOperation)id;
|
||||
switch (vop) {
|
||||
case VECTOR_OP_ADD: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_AddI;
|
||||
case T_LONG: return Op_AddL;
|
||||
case T_FLOAT: return Op_AddF;
|
||||
case T_DOUBLE: return Op_AddD;
|
||||
default: fatal("ADD: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_SUB: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_SubI;
|
||||
case T_LONG: return Op_SubL;
|
||||
case T_FLOAT: return Op_SubF;
|
||||
case T_DOUBLE: return Op_SubD;
|
||||
default: fatal("SUB: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_MUL: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_MulI;
|
||||
case T_LONG: return Op_MulL;
|
||||
case T_FLOAT: return Op_MulF;
|
||||
case T_DOUBLE: return Op_MulD;
|
||||
default: fatal("MUL: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_DIV: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_DivI;
|
||||
case T_LONG: return Op_DivL;
|
||||
case T_FLOAT: return Op_DivF;
|
||||
case T_DOUBLE: return Op_DivD;
|
||||
default: fatal("DIV: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_MIN: {
|
||||
switch (bt) {
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT: return Op_MinI;
|
||||
case T_LONG: return Op_MinL;
|
||||
case T_FLOAT: return Op_MinF;
|
||||
case T_DOUBLE: return Op_MinD;
|
||||
default: fatal("MIN: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_MAX: {
|
||||
switch (bt) {
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT: return Op_MaxI;
|
||||
case T_LONG: return Op_MaxL;
|
||||
case T_FLOAT: return Op_MaxF;
|
||||
case T_DOUBLE: return Op_MaxD;
|
||||
default: fatal("MAX: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_ABS: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_AbsI;
|
||||
case T_LONG: return Op_AbsL;
|
||||
case T_FLOAT: return Op_AbsF;
|
||||
case T_DOUBLE: return Op_AbsD;
|
||||
default: fatal("ABS: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_NEG: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_NegI;
|
||||
case T_FLOAT: return Op_NegF;
|
||||
case T_DOUBLE: return Op_NegD;
|
||||
default: fatal("NEG: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_AND: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_AndI;
|
||||
case T_LONG: return Op_AndL;
|
||||
default: fatal("AND: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_OR: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_OrI;
|
||||
case T_LONG: return Op_OrL;
|
||||
default: fatal("OR: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_XOR: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_XorI;
|
||||
case T_LONG: return Op_XorL;
|
||||
default: fatal("XOR: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_SQRT: {
|
||||
switch (bt) {
|
||||
case T_FLOAT: return Op_SqrtF;
|
||||
case T_DOUBLE: return Op_SqrtD;
|
||||
default: fatal("SQRT: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_FMA: {
|
||||
switch (bt) {
|
||||
case T_FLOAT: return Op_FmaF;
|
||||
case T_DOUBLE: return Op_FmaD;
|
||||
default: fatal("FMA: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_LSHIFT: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_LShiftI;
|
||||
case T_LONG: return Op_LShiftL;
|
||||
default: fatal("LSHIFT: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_RSHIFT: {
|
||||
switch (bt) {
|
||||
case T_BYTE: // fall-through
|
||||
case T_SHORT: // fall-through
|
||||
case T_INT: return Op_RShiftI;
|
||||
case T_LONG: return Op_RShiftL;
|
||||
default: fatal("RSHIFT: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VECTOR_OP_URSHIFT: {
|
||||
switch (bt) {
|
||||
case T_BYTE: return Op_URShiftB;
|
||||
case T_SHORT: return Op_URShiftS;
|
||||
case T_INT: return Op_URShiftI;
|
||||
case T_LONG: return Op_URShiftL;
|
||||
default: fatal("URSHIFT: %s", type2name(bt));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: fatal("unknown op: %d", vop);
|
||||
}
|
||||
return 0; // Unimplemented
|
||||
}
|
||||
#endif // COMPILER2
|
||||
|
||||
/**
|
||||
* Implementation of the jdk.internal.vm.vector.VectorSupport class
|
||||
*/
|
||||
|
||||
JVM_ENTRY(jint, VectorSupport_GetMaxLaneCount(JNIEnv *env, jclass vsclazz, jobject clazz)) {
|
||||
#ifdef COMPILER2
|
||||
oop mirror = JNIHandles::resolve_non_null(clazz);
|
||||
if (java_lang_Class::is_primitive(mirror)) {
|
||||
BasicType bt = java_lang_Class::primitive_type(mirror);
|
||||
return Matcher::max_vector_size(bt);
|
||||
}
|
||||
#endif // COMPILER2
|
||||
return -1;
|
||||
} JVM_END
|
||||
|
||||
// JVM_RegisterVectorSupportMethods
|
||||
|
||||
#define LANG "Ljava/lang/"
|
||||
#define CLS LANG "Class;"
|
||||
|
||||
#define CC (char*) /*cast a literal from (const char*)*/
|
||||
#define FN_PTR(f) CAST_FROM_FN_PTR(void*, &f)
|
||||
|
||||
static JNINativeMethod jdk_internal_vm_vector_VectorSupport_methods[] = {
|
||||
{CC "getMaxLaneCount", CC "(" CLS ")I", FN_PTR(VectorSupport_GetMaxLaneCount)}
|
||||
};
|
||||
|
||||
#undef CC
|
||||
#undef FN_PTR
|
||||
|
||||
#undef LANG
|
||||
#undef CLS
|
||||
|
||||
// This function is exported, used by NativeLookup.
|
||||
|
||||
JVM_ENTRY(void, JVM_RegisterVectorSupportMethods(JNIEnv* env, jclass vsclass)) {
|
||||
ThreadToNativeFromVM ttnfv(thread);
|
||||
|
||||
int ok = env->RegisterNatives(vsclass, jdk_internal_vm_vector_VectorSupport_methods, sizeof(jdk_internal_vm_vector_VectorSupport_methods)/sizeof(JNINativeMethod));
|
||||
guarantee(ok == 0, "register jdk.internal.vm.vector.VectorSupport natives");
|
||||
} JVM_END
|
||||
90
src/hotspot/share/prims/vectorSupport.hpp
Normal file
90
src/hotspot/share/prims/vectorSupport.hpp
Normal file
@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_PRIMS_VECTORSUPPORT_HPP
|
||||
#define SHARE_PRIMS_VECTORSUPPORT_HPP
|
||||
|
||||
#include "jni.h"
|
||||
#include "code/debugInfo.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
#include "oops/typeArrayOop.inline.hpp"
|
||||
#include "runtime/frame.inline.hpp"
|
||||
#include "runtime/registerMap.hpp"
|
||||
#include "utilities/exceptions.hpp"
|
||||
|
||||
extern "C" {
|
||||
void JNICALL JVM_RegisterVectorSupportMethods(JNIEnv* env, jclass vsclass);
|
||||
}
|
||||
|
||||
class VectorSupport : AllStatic {
|
||||
private:
|
||||
static void init_mask_array(typeArrayOop arr, BasicType elem_bt, int num_elem, address value_addr);
|
||||
static void init_vector_array(typeArrayOop arr, BasicType elem_bt, int num_elem, address value_addr);
|
||||
static oop allocate_vector_payload_helper(InstanceKlass* ik, BasicType elem_bt, int num_elem, address value_addr, TRAPS);
|
||||
|
||||
static BasicType klass2bt(InstanceKlass* ik);
|
||||
static jint klass2length(InstanceKlass* ik);
|
||||
|
||||
public:
|
||||
|
||||
// Should be aligned with constants in jdk.internal.vm.vector.VectorSupport
|
||||
enum VectorOperation {
|
||||
// Unary
|
||||
VECTOR_OP_ABS = 0,
|
||||
VECTOR_OP_NEG = 1,
|
||||
VECTOR_OP_SQRT = 2,
|
||||
|
||||
// Binary
|
||||
VECTOR_OP_ADD = 4,
|
||||
VECTOR_OP_SUB = 5,
|
||||
VECTOR_OP_MUL = 6,
|
||||
VECTOR_OP_DIV = 7,
|
||||
VECTOR_OP_MIN = 8,
|
||||
VECTOR_OP_MAX = 9,
|
||||
VECTOR_OP_AND = 10,
|
||||
VECTOR_OP_OR = 11,
|
||||
VECTOR_OP_XOR = 12,
|
||||
|
||||
// Ternary
|
||||
VECTOR_OP_FMA = 13,
|
||||
|
||||
// Broadcast int
|
||||
VECTOR_OP_LSHIFT = 14,
|
||||
VECTOR_OP_RSHIFT = 15,
|
||||
VECTOR_OP_URSHIFT = 16,
|
||||
|
||||
// Convert
|
||||
VECTOR_OP_CAST = 17,
|
||||
VECTOR_OP_REINTERPRET = 18
|
||||
};
|
||||
|
||||
static int vop2ideal(jint vop, BasicType bt);
|
||||
|
||||
static oop allocate_vector(InstanceKlass* holder, frame* fr, RegisterMap* reg_map, ObjectValue* sv, TRAPS);
|
||||
|
||||
static bool is_vector(Klass* klass);
|
||||
static bool is_vector_mask(Klass* klass);
|
||||
static bool is_vector_shuffle(Klass* klass);
|
||||
};
|
||||
#endif // SHARE_PRIMS_VECTORSUPPORT_HPP
|
||||
@ -4195,7 +4195,23 @@ jint Arguments::apply_ergo() {
|
||||
if (!UseBiasedLocking) {
|
||||
UseOptoBiasInlining = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!EnableVectorSupport) {
|
||||
if (!FLAG_IS_DEFAULT(EnableVectorReboxing) && EnableVectorReboxing) {
|
||||
warning("Disabling EnableVectorReboxing since EnableVectorSupport is turned off.");
|
||||
}
|
||||
FLAG_SET_DEFAULT(EnableVectorReboxing, false);
|
||||
|
||||
if (!FLAG_IS_DEFAULT(EnableVectorAggressiveReboxing) && EnableVectorAggressiveReboxing) {
|
||||
if (!EnableVectorReboxing) {
|
||||
warning("Disabling EnableVectorAggressiveReboxing since EnableVectorReboxing is turned off.");
|
||||
} else {
|
||||
warning("Disabling EnableVectorAggressiveReboxing since EnableVectorSupport is turned off.");
|
||||
}
|
||||
}
|
||||
FLAG_SET_DEFAULT(EnableVectorAggressiveReboxing, false);
|
||||
}
|
||||
#endif // COMPILER2
|
||||
|
||||
if (FLAG_IS_CMDLINE(DiagnoseSyncOnPrimitiveWrappers)) {
|
||||
if (DiagnoseSyncOnPrimitiveWrappers == ObjectSynchronizer::LOG_WARNING && !log_is_enabled(Info, primitivewrappers)) {
|
||||
|
||||
@ -49,6 +49,7 @@
|
||||
#include "oops/typeArrayOop.inline.hpp"
|
||||
#include "oops/verifyOopClosure.hpp"
|
||||
#include "prims/jvmtiThreadState.hpp"
|
||||
#include "prims/vectorSupport.hpp"
|
||||
#include "prims/methodHandles.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
#include "runtime/biasedLocking.hpp"
|
||||
@ -1015,7 +1016,15 @@ bool Deoptimization::realloc_objects(JavaThread* thread, frame* fr, RegisterMap*
|
||||
#endif // INCLUDE_JVMCI || INCLUDE_AOT
|
||||
InstanceKlass* ik = InstanceKlass::cast(k);
|
||||
if (obj == NULL) {
|
||||
#ifdef COMPILER2
|
||||
if (EnableVectorSupport && VectorSupport::is_vector(ik)) {
|
||||
obj = VectorSupport::allocate_vector(ik, fr, reg_map, sv, THREAD);
|
||||
} else {
|
||||
obj = ik->allocate_instance(THREAD);
|
||||
}
|
||||
#else
|
||||
obj = ik->allocate_instance(THREAD);
|
||||
#endif // COMPILER2
|
||||
}
|
||||
} else if (k->is_typeArray_klass()) {
|
||||
TypeArrayKlass* ak = TypeArrayKlass::cast(k);
|
||||
@ -1352,6 +1361,11 @@ void Deoptimization::reassign_fields(frame* fr, RegisterMap* reg_map, GrowableAr
|
||||
continue;
|
||||
}
|
||||
#endif // INCLUDE_JVMCI || INCLUDE_AOT
|
||||
#ifdef COMPILER2
|
||||
if (EnableVectorSupport && VectorSupport::is_vector(k)) {
|
||||
continue; // skip field reassignment for vectors
|
||||
}
|
||||
#endif
|
||||
if (k->is_instance_klass()) {
|
||||
InstanceKlass* ik = InstanceKlass::cast(k);
|
||||
reassign_fields_by_klass(ik, fr, reg_map, sv, 0, obj(), skip_internal);
|
||||
|
||||
@ -150,8 +150,12 @@ StackValue* StackValue::create_stack_value(const frame* fr, const RegisterMap* r
|
||||
value.ji = *(jint*)value_addr;
|
||||
return new StackValue(value.p);
|
||||
}
|
||||
case Location::invalid:
|
||||
case Location::invalid: {
|
||||
return new StackValue();
|
||||
}
|
||||
case Location::vector: {
|
||||
ShouldNotReachHere(); // should be handled by Deoptimization::realloc_objects()
|
||||
}
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
@ -222,7 +226,7 @@ void StackValue::print_on(outputStream* st) const {
|
||||
st->print("NULL");
|
||||
}
|
||||
st->print(" <" INTPTR_FORMAT ">", p2i(_handle_value()));
|
||||
break;
|
||||
break;
|
||||
|
||||
case T_CONFLICT:
|
||||
st->print("conflict");
|
||||
|
||||
@ -1502,6 +1502,8 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
|
||||
declare_c2_type(MaxNode, AddNode) \
|
||||
declare_c2_type(MaxINode, MaxNode) \
|
||||
declare_c2_type(MinINode, MaxNode) \
|
||||
declare_c2_type(MaxLNode, MaxNode) \
|
||||
declare_c2_type(MinLNode, MaxNode) \
|
||||
declare_c2_type(MaxFNode, MaxNode) \
|
||||
declare_c2_type(MinFNode, MaxNode) \
|
||||
declare_c2_type(MaxDNode, MaxNode) \
|
||||
@ -1736,6 +1738,8 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
|
||||
declare_c2_type(AbsDNode, AbsNode) \
|
||||
declare_c2_type(CmpLTMaskNode, Node) \
|
||||
declare_c2_type(NegNode, Node) \
|
||||
declare_c2_type(NegINode, NegNode) \
|
||||
declare_c2_type(NegLNode, NegNode) \
|
||||
declare_c2_type(NegFNode, NegNode) \
|
||||
declare_c2_type(NegDNode, NegNode) \
|
||||
declare_c2_type(AtanDNode, Node) \
|
||||
@ -1745,10 +1749,12 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
|
||||
declare_c2_type(ReverseBytesLNode, Node) \
|
||||
declare_c2_type(ReductionNode, Node) \
|
||||
declare_c2_type(VectorNode, Node) \
|
||||
declare_c2_type(AbsVBNode, VectorNode) \
|
||||
declare_c2_type(AbsVSNode, VectorNode) \
|
||||
declare_c2_type(AbsVINode, VectorNode) \
|
||||
declare_c2_type(AbsVLNode, VectorNode) \
|
||||
declare_c2_type(AbsVFNode, VectorNode) \
|
||||
declare_c2_type(AbsVDNode, VectorNode) \
|
||||
declare_c2_type(AbsVBNode, VectorNode) \
|
||||
declare_c2_type(AbsVSNode, VectorNode) \
|
||||
declare_c2_type(AbsVINode, VectorNode) \
|
||||
declare_c2_type(AbsVLNode, VectorNode) \
|
||||
declare_c2_type(AddVBNode, VectorNode) \
|
||||
declare_c2_type(AddVSNode, VectorNode) \
|
||||
declare_c2_type(AddVINode, VectorNode) \
|
||||
@ -1774,6 +1780,7 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
|
||||
declare_c2_type(MulVFNode, VectorNode) \
|
||||
declare_c2_type(MulReductionVFNode, ReductionNode) \
|
||||
declare_c2_type(MulVDNode, VectorNode) \
|
||||
declare_c2_type(NegVINode, VectorNode) \
|
||||
declare_c2_type(NegVFNode, VectorNode) \
|
||||
declare_c2_type(NegVDNode, VectorNode) \
|
||||
declare_c2_type(FmaVDNode, VectorNode) \
|
||||
@ -1796,6 +1803,8 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
|
||||
declare_c2_type(URShiftVSNode, VectorNode) \
|
||||
declare_c2_type(URShiftVINode, VectorNode) \
|
||||
declare_c2_type(URShiftVLNode, VectorNode) \
|
||||
declare_c2_type(MinReductionVNode, ReductionNode) \
|
||||
declare_c2_type(MaxReductionVNode, ReductionNode) \
|
||||
declare_c2_type(AndVNode, VectorNode) \
|
||||
declare_c2_type(AndReductionVNode, ReductionNode) \
|
||||
declare_c2_type(OrVNode, VectorNode) \
|
||||
@ -1804,8 +1813,6 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
|
||||
declare_c2_type(XorReductionVNode, ReductionNode) \
|
||||
declare_c2_type(MaxVNode, VectorNode) \
|
||||
declare_c2_type(MinVNode, VectorNode) \
|
||||
declare_c2_type(MaxReductionVNode, ReductionNode) \
|
||||
declare_c2_type(MinReductionVNode, ReductionNode) \
|
||||
declare_c2_type(LoadVectorNode, LoadNode) \
|
||||
declare_c2_type(StoreVectorNode, StoreNode) \
|
||||
declare_c2_type(ReplicateBNode, VectorNode) \
|
||||
@ -1847,6 +1854,27 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
|
||||
declare_c2_type(CopySignFNode, Node) \
|
||||
declare_c2_type(SignumDNode, Node) \
|
||||
declare_c2_type(SignumFNode, Node) \
|
||||
declare_c2_type(LoadVectorGatherNode, LoadVectorNode) \
|
||||
declare_c2_type(StoreVectorScatterNode, StoreVectorNode) \
|
||||
declare_c2_type(VectorLoadMaskNode, VectorNode) \
|
||||
declare_c2_type(VectorLoadShuffleNode, VectorNode) \
|
||||
declare_c2_type(VectorStoreMaskNode, VectorNode) \
|
||||
declare_c2_type(VectorBlendNode, VectorNode) \
|
||||
declare_c2_type(VectorRearrangeNode, VectorNode) \
|
||||
declare_c2_type(VectorMaskWrapperNode, VectorNode) \
|
||||
declare_c2_type(VectorMaskCmpNode, VectorNode) \
|
||||
declare_c2_type(VectorCastB2XNode, VectorNode) \
|
||||
declare_c2_type(VectorCastS2XNode, VectorNode) \
|
||||
declare_c2_type(VectorCastI2XNode, VectorNode) \
|
||||
declare_c2_type(VectorCastL2XNode, VectorNode) \
|
||||
declare_c2_type(VectorCastF2XNode, VectorNode) \
|
||||
declare_c2_type(VectorCastD2XNode, VectorNode) \
|
||||
declare_c2_type(VectorInsertNode, VectorNode) \
|
||||
declare_c2_type(VectorUnboxNode, VectorNode) \
|
||||
declare_c2_type(VectorReinterpretNode, VectorNode) \
|
||||
declare_c2_type(VectorBoxNode, Node) \
|
||||
declare_c2_type(VectorBoxAllocateNode, CallStaticJavaNode) \
|
||||
declare_c2_type(VectorTestNode, Node) \
|
||||
\
|
||||
/*********************/ \
|
||||
/* Adapter Blob Entries */ \
|
||||
|
||||
@ -237,6 +237,9 @@ inline size_t heap_word_size(size_t byte_size) {
|
||||
return (byte_size + (HeapWordSize-1)) >> LogHeapWordSize;
|
||||
}
|
||||
|
||||
inline jfloat jfloat_cast(jint x);
|
||||
inline jdouble jdouble_cast(jlong x);
|
||||
|
||||
//-------------------------------------------
|
||||
// Constant for jlong (standardized by C++11)
|
||||
|
||||
@ -247,6 +250,13 @@ inline size_t heap_word_size(size_t byte_size) {
|
||||
const jlong min_jlong = CONST64(0x8000000000000000);
|
||||
const jlong max_jlong = CONST64(0x7fffffffffffffff);
|
||||
|
||||
//-------------------------------------------
|
||||
// Constant for jdouble
|
||||
const jlong min_jlongDouble = CONST64(0x0000000000000001);
|
||||
const jdouble min_jdouble = jdouble_cast(min_jlongDouble);
|
||||
const jlong max_jlongDouble = CONST64(0x7fefffffffffffff);
|
||||
const jdouble max_jdouble = jdouble_cast(max_jlongDouble);
|
||||
|
||||
const size_t K = 1024;
|
||||
const size_t M = K*K;
|
||||
const size_t G = M*K;
|
||||
@ -469,6 +479,11 @@ const jshort max_jshort = (1 << 15) - 1; // largest jshort
|
||||
const jint min_jint = (jint)1 << (sizeof(jint)*BitsPerByte-1); // 0x80000000 == smallest jint
|
||||
const jint max_jint = (juint)min_jint - 1; // 0x7FFFFFFF == largest jint
|
||||
|
||||
const jint min_jintFloat = (jint)(0x00000001);
|
||||
const jfloat min_jfloat = jfloat_cast(min_jintFloat);
|
||||
const jint max_jintFloat = (jint)(0x7f7fffff);
|
||||
const jfloat max_jfloat = jfloat_cast(max_jintFloat);
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
// JVM spec restrictions
|
||||
|
||||
@ -673,6 +688,14 @@ inline bool is_reference_type(BasicType t) {
|
||||
return (t == T_OBJECT || t == T_ARRAY);
|
||||
}
|
||||
|
||||
inline bool is_integral_type(BasicType t) {
|
||||
return is_subword_type(t) || t == T_INT || t == T_LONG;
|
||||
}
|
||||
|
||||
inline bool is_floating_point_type(BasicType t) {
|
||||
return (t == T_FLOAT || t == T_DOUBLE);
|
||||
}
|
||||
|
||||
extern char type2char_tab[T_CONFLICT+1]; // Map a BasicType to a jchar
|
||||
inline char type2char(BasicType t) { return (uint)t < T_CONFLICT+1 ? type2char_tab[t] : 0; }
|
||||
extern int type2size[T_CONFLICT+1]; // Map BasicType to result stack elements
|
||||
|
||||
@ -0,0 +1,468 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package jdk.internal.vm.vector;
|
||||
|
||||
import jdk.internal.vm.annotation.IntrinsicCandidate;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
|
||||
import java.nio.Buffer;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Objects;
|
||||
import java.util.function.*;
|
||||
|
||||
public class VectorSupport {
|
||||
static {
|
||||
registerNatives();
|
||||
}
|
||||
|
||||
private static final Unsafe U = Unsafe.getUnsafe();
|
||||
|
||||
// Unary
|
||||
public static final int VECTOR_OP_ABS = 0;
|
||||
public static final int VECTOR_OP_NEG = 1;
|
||||
public static final int VECTOR_OP_SQRT = 2;
|
||||
|
||||
// Binary
|
||||
public static final int VECTOR_OP_ADD = 4;
|
||||
public static final int VECTOR_OP_SUB = 5;
|
||||
public static final int VECTOR_OP_MUL = 6;
|
||||
public static final int VECTOR_OP_DIV = 7;
|
||||
public static final int VECTOR_OP_MIN = 8;
|
||||
public static final int VECTOR_OP_MAX = 9;
|
||||
|
||||
public static final int VECTOR_OP_AND = 10;
|
||||
public static final int VECTOR_OP_OR = 11;
|
||||
public static final int VECTOR_OP_XOR = 12;
|
||||
|
||||
// Ternary
|
||||
public static final int VECTOR_OP_FMA = 13;
|
||||
|
||||
// Broadcast int
|
||||
public static final int VECTOR_OP_LSHIFT = 14;
|
||||
public static final int VECTOR_OP_RSHIFT = 15;
|
||||
public static final int VECTOR_OP_URSHIFT = 16;
|
||||
|
||||
public static final int VECTOR_OP_CAST = 17;
|
||||
public static final int VECTOR_OP_REINTERPRET = 18;
|
||||
|
||||
// enum BoolTest
|
||||
public static final int BT_eq = 0;
|
||||
public static final int BT_ne = 4;
|
||||
public static final int BT_le = 5;
|
||||
public static final int BT_ge = 7;
|
||||
public static final int BT_lt = 3;
|
||||
public static final int BT_gt = 1;
|
||||
public static final int BT_overflow = 2;
|
||||
public static final int BT_no_overflow = 6;
|
||||
|
||||
// BasicType codes, for primitives only:
|
||||
public static final int
|
||||
T_FLOAT = 6,
|
||||
T_DOUBLE = 7,
|
||||
T_BYTE = 8,
|
||||
T_SHORT = 9,
|
||||
T_INT = 10,
|
||||
T_LONG = 11;
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public static class VectorSpecies<E> {}
|
||||
|
||||
public static class VectorPayload {
|
||||
private final Object payload; // array of primitives
|
||||
|
||||
public VectorPayload(Object payload) {
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
protected final Object getPayload() {
|
||||
return VectorSupport.maybeRebox(this).payload;
|
||||
}
|
||||
}
|
||||
|
||||
public static class Vector<E> extends VectorPayload {
|
||||
public Vector(Object payload) {
|
||||
super(payload);
|
||||
}
|
||||
}
|
||||
|
||||
public static class VectorShuffle<E> extends VectorPayload {
|
||||
public VectorShuffle(Object payload) {
|
||||
super(payload);
|
||||
}
|
||||
}
|
||||
public static class VectorMask<E> extends VectorPayload {
|
||||
public VectorMask(Object payload) {
|
||||
super(payload);
|
||||
}
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
public interface BroadcastOperation<VM, E, S extends VectorSpecies<E>> {
|
||||
VM broadcast(long l, S s);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<VM, E, S extends VectorSpecies<E>>
|
||||
VM broadcastCoerced(Class<? extends VM> vmClass, Class<E> E, int length,
|
||||
long bits, S s,
|
||||
BroadcastOperation<VM, E, S> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.broadcast(bits, s);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
public interface ShuffleIotaOperation<E, S extends VectorSpecies<E>> {
|
||||
VectorShuffle<E> apply(int length, int start, int step, S s);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<E, S extends VectorSpecies<E>>
|
||||
VectorShuffle<E> shuffleIota(Class<?> E, Class<?> ShuffleClass, S s, int length,
|
||||
int start, int step, int wrap, ShuffleIotaOperation<E, S> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(length, start, step, s);
|
||||
}
|
||||
|
||||
public interface ShuffleToVectorOperation<VM, Sh, E> {
|
||||
VM apply(Sh s);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<VM ,Sh extends VectorShuffle<E>, E>
|
||||
VM shuffleToVector(Class<?> VM, Class<?>E , Class<?> ShuffleClass, Sh s, int length,
|
||||
ShuffleToVectorOperation<VM,Sh,E> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(s);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
public interface IndexOperation<V extends Vector<E>, E, S extends VectorSpecies<E>> {
|
||||
V index(V v, int step, S s);
|
||||
}
|
||||
|
||||
//FIXME @IntrinsicCandidate
|
||||
public static
|
||||
<V extends Vector<E>, E, S extends VectorSpecies<E>>
|
||||
V indexVector(Class<? extends V> vClass, Class<E> E, int length,
|
||||
V v, int step, S s,
|
||||
IndexOperation<V, E, S> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.index(v, step, s);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<V extends Vector<?>>
|
||||
long reductionCoerced(int oprId, Class<?> vectorClass, Class<?> elementType, int length,
|
||||
V v,
|
||||
Function<V,Long> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(v);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface VecExtractOp<V> {
|
||||
long apply(V v1, int idx);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<V extends Vector<?>>
|
||||
long extract(Class<?> vectorClass, Class<?> elementType, int vlen,
|
||||
V vec, int ix,
|
||||
VecExtractOp<V> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(vec, ix);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface VecInsertOp<V> {
|
||||
V apply(V v1, int idx, long val);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<V extends Vector<?>>
|
||||
V insert(Class<? extends V> vectorClass, Class<?> elementType, int vlen,
|
||||
V vec, int ix, long val,
|
||||
VecInsertOp<V> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(vec, ix, val);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<VM>
|
||||
VM unaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
|
||||
VM vm,
|
||||
Function<VM, VM> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(vm);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<VM>
|
||||
VM binaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
|
||||
VM vm1, VM vm2,
|
||||
BiFunction<VM, VM, VM> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(vm1, vm2);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface TernaryOperation<V> {
|
||||
V apply(V v1, V v2, V v3);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<VM>
|
||||
VM ternaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
|
||||
VM vm1, VM vm2, VM vm3,
|
||||
TernaryOperation<VM> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(vm1, vm2, vm3);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
// Memory operations
|
||||
|
||||
public interface LoadOperation<C, V, E, S extends VectorSpecies<E>> {
|
||||
V load(C container, int index, S s);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<C, VM, E, S extends VectorSpecies<E>>
|
||||
VM load(Class<? extends VM> vmClass, Class<E> E, int length,
|
||||
Object base, long offset, // Unsafe addressing
|
||||
C container, int index, S s, // Arguments for default implementation
|
||||
LoadOperation<C, VM, E, S> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.load(container, index, s);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface LoadVectorOperationWithMap<C, V extends Vector<?>, E, S extends VectorSpecies<E>> {
|
||||
V loadWithMap(C container, int index, int[] indexMap, int indexM, S s);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<C, V extends Vector<?>, W extends Vector<Integer>, E, S extends VectorSpecies<E>>
|
||||
V loadWithMap(Class<?> vectorClass, Class<E> E, int length, Class<?> vectorIndexClass,
|
||||
Object base, long offset, // Unsafe addressing
|
||||
W index_vector,
|
||||
C container, int index, int[] indexMap, int indexM, S s, // Arguments for default implementation
|
||||
LoadVectorOperationWithMap<C, V, E, S> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.loadWithMap(container, index, indexMap, indexM, s);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface StoreVectorOperation<C, V extends Vector<?>> {
|
||||
void store(C container, int index, V v);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<C, V extends Vector<?>>
|
||||
void store(Class<?> vectorClass, Class<?> elementType, int length,
|
||||
Object base, long offset, // Unsafe addressing
|
||||
V v,
|
||||
C container, int index, // Arguments for default implementation
|
||||
StoreVectorOperation<C, V> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
defaultImpl.store(container, index, v);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface StoreVectorOperationWithMap<C, V extends Vector<?>> {
|
||||
void storeWithMap(C container, int index, V v, int[] indexMap, int indexM);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<C, V extends Vector<?>, W extends Vector<Integer>>
|
||||
void storeWithMap(Class<?> vectorClass, Class<?> elementType, int length, Class<?> vectorIndexClass,
|
||||
Object base, long offset, // Unsafe addressing
|
||||
W index_vector, V v,
|
||||
C container, int index, int[] indexMap, int indexM, // Arguments for default implementation
|
||||
StoreVectorOperationWithMap<C, V> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
defaultImpl.storeWithMap(container, index, v, indexMap, indexM);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<VM>
|
||||
boolean test(int cond, Class<?> vmClass, Class<?> elementType, int length,
|
||||
VM vm1, VM vm2,
|
||||
BiFunction<VM, VM, Boolean> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(vm1, vm2);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface VectorCompareOp<V,M> {
|
||||
M apply(int cond, V v1, V v2);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static <V extends Vector<E>,
|
||||
M extends VectorMask<E>,
|
||||
E>
|
||||
M compare(int cond, Class<? extends V> vectorClass, Class<M> maskClass, Class<?> elementType, int length,
|
||||
V v1, V v2,
|
||||
VectorCompareOp<V,M> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(cond, v1, v2);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface VectorRearrangeOp<V extends Vector<E>,
|
||||
Sh extends VectorShuffle<E>,
|
||||
E> {
|
||||
V apply(V v1, Sh shuffle);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<V extends Vector<E>,
|
||||
Sh extends VectorShuffle<E>,
|
||||
E>
|
||||
V rearrangeOp(Class<? extends V> vectorClass, Class<Sh> shuffleClass, Class<?> elementType, int vlen,
|
||||
V v1, Sh sh,
|
||||
VectorRearrangeOp<V,Sh, E> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(v1, sh);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface VectorBlendOp<V extends Vector<E>,
|
||||
M extends VectorMask<E>,
|
||||
E> {
|
||||
V apply(V v1, V v2, M mask);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<V extends Vector<E>,
|
||||
M extends VectorMask<E>,
|
||||
E>
|
||||
V blend(Class<? extends V> vectorClass, Class<M> maskClass, Class<?> elementType, int length,
|
||||
V v1, V v2, M m,
|
||||
VectorBlendOp<V,M, E> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(v1, v2, m);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface VectorBroadcastIntOp<V extends Vector<?>> {
|
||||
V apply(V v, int n);
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static
|
||||
<V extends Vector<?>>
|
||||
V broadcastInt(int opr, Class<? extends V> vectorClass, Class<?> elementType, int length,
|
||||
V v, int n,
|
||||
VectorBroadcastIntOp<V> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(v, n);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public interface VectorConvertOp<VOUT, VIN, S> {
|
||||
VOUT apply(VIN v, S species);
|
||||
}
|
||||
|
||||
// Users of this intrinsic assume that it respects
|
||||
// REGISTER_ENDIAN, which is currently ByteOrder.LITTLE_ENDIAN.
|
||||
// See javadoc for REGISTER_ENDIAN.
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static <VOUT extends VectorPayload,
|
||||
VIN extends VectorPayload,
|
||||
S extends VectorSpecies<?>>
|
||||
VOUT convert(int oprId,
|
||||
Class<?> fromVectorClass, Class<?> fromElementType, int fromVLen,
|
||||
Class<?> toVectorClass, Class<?> toElementType, int toVLen,
|
||||
VIN v, S s,
|
||||
VectorConvertOp<VOUT, VIN, S> defaultImpl) {
|
||||
assert isNonCapturingLambda(defaultImpl) : defaultImpl;
|
||||
return defaultImpl.apply(v, s);
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
@IntrinsicCandidate
|
||||
public static <V> V maybeRebox(V v) {
|
||||
// The fence is added here to avoid memory aliasing problems in C2 between scalar & vector accesses.
|
||||
// TODO: move the fence generation into C2. Generate only when reboxing is taking place.
|
||||
U.loadFence();
|
||||
return v;
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
// query the JVM's supported vector sizes and types
|
||||
public static native int getMaxLaneCount(Class<?> etype);
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
public static boolean isNonCapturingLambda(Object o) {
|
||||
return o.getClass().getDeclaredFields().length == 0;
|
||||
}
|
||||
|
||||
/* ============================================================================ */
|
||||
|
||||
private static native int registerNatives();
|
||||
}
|
||||
@ -138,9 +138,10 @@ module java.base {
|
||||
jdk.incubator.foreign;
|
||||
exports com.sun.security.ntlm to
|
||||
java.security.sasl;
|
||||
exports jdk.internal to
|
||||
exports jdk.internal to // for @HotSpotIntrinsicCandidate
|
||||
java.compiler,
|
||||
jdk.compiler,
|
||||
jdk.incubator.vector,
|
||||
jdk.jshell;
|
||||
exports jdk.internal.access to
|
||||
java.desktop,
|
||||
@ -195,6 +196,7 @@ module java.base {
|
||||
jdk.attach,
|
||||
jdk.charsets,
|
||||
jdk.compiler,
|
||||
jdk.incubator.vector,
|
||||
jdk.jfr,
|
||||
jdk.jshell,
|
||||
jdk.nio.mapmode,
|
||||
@ -228,9 +230,12 @@ module java.base {
|
||||
jdk.management.agent;
|
||||
exports jdk.internal.vm.annotation to
|
||||
jdk.internal.vm.ci,
|
||||
jdk.incubator.vector,
|
||||
jdk.incubator.foreign,
|
||||
jdk.jfr,
|
||||
jdk.unsupported;
|
||||
exports jdk.internal.vm.vector to
|
||||
jdk.incubator.vector;
|
||||
exports jdk.internal.util to
|
||||
jdk.incubator.foreign;
|
||||
exports jdk.internal.util.jar to
|
||||
|
||||
@ -0,0 +1,290 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
abstract class AbstractMask<E> extends VectorMask<E> {
|
||||
AbstractMask(boolean[] bits) {
|
||||
super(bits);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
abstract boolean[] getBits();
|
||||
|
||||
// Unary operator
|
||||
|
||||
interface MUnOp {
|
||||
boolean apply(int i, boolean a);
|
||||
}
|
||||
|
||||
abstract AbstractMask<E> uOp(MUnOp f);
|
||||
|
||||
// Binary operator
|
||||
|
||||
interface MBinOp {
|
||||
boolean apply(int i, boolean a, boolean b);
|
||||
}
|
||||
|
||||
abstract AbstractMask<E> bOp(VectorMask<E> o, MBinOp f);
|
||||
|
||||
/*package-private*/
|
||||
abstract AbstractSpecies<E> vspecies();
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final VectorSpecies<E> vectorSpecies() {
|
||||
return vspecies();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean laneIsSet(int i) {
|
||||
return getBits()[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
public long toLong() {
|
||||
// FIXME: This should be an intrinsic.
|
||||
if (length() > Long.SIZE) {
|
||||
throw new UnsupportedOperationException("too many lanes for one long");
|
||||
}
|
||||
long res = 0;
|
||||
long set = 1;
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < bits.length; i++) {
|
||||
res = bits[i] ? res | set : res;
|
||||
set = set << 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void intoArray(boolean[] bits, int i) {
|
||||
System.arraycopy(getBits(), 0, bits, i, length());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean[] toArray() {
|
||||
return getBits().clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
@SuppressWarnings("unchecked")
|
||||
public
|
||||
<F> VectorMask<F> check(Class<F> elementType) {
|
||||
if (vectorSpecies().elementType() != elementType) {
|
||||
throw AbstractSpecies.checkFailed(this, elementType);
|
||||
}
|
||||
return (VectorMask<F>) this;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
@SuppressWarnings("unchecked")
|
||||
public
|
||||
<F> VectorMask<F> check(VectorSpecies<F> species) {
|
||||
if (species != vectorSpecies()) {
|
||||
throw AbstractSpecies.checkFailed(this, species);
|
||||
}
|
||||
return (VectorMask<F>) this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int trueCount() {
|
||||
//FIXME: use a population count intrinsic here
|
||||
int c = 0;
|
||||
for (boolean i : getBits()) {
|
||||
if (i) c++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int firstTrue() {
|
||||
//FIXME: use a count trailing zeros intrinsic here
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < bits.length; i++) {
|
||||
if (bits[i]) return i;
|
||||
}
|
||||
return bits.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int lastTrue() {
|
||||
//FIXME: use a count leading zeros intrinsic here
|
||||
boolean[] bits = getBits();
|
||||
for (int i = bits.length-1; i >= 0; i--) {
|
||||
if (bits[i]) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorMask<E> eq(VectorMask<E> m) {
|
||||
// FIXME: Generate good code here.
|
||||
return bOp(m, (i, a, b) -> a == b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorMask<E> andNot(VectorMask<E> m) {
|
||||
// FIXME: Generate good code here.
|
||||
return bOp(m, (i, a, b) -> a && !b);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
static boolean anyTrueHelper(boolean[] bits) {
|
||||
// FIXME: Maybe use toLong() != 0 here.
|
||||
for (boolean i : bits) {
|
||||
if (i) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
static boolean allTrueHelper(boolean[] bits) {
|
||||
// FIXME: Maybe use not().toLong() == 0 here.
|
||||
for (boolean i : bits) {
|
||||
if (!i) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorMask<E> indexInRange(int offset, int limit) {
|
||||
int vlength = length();
|
||||
Vector<E> iota = vectorSpecies().zero().addIndex(1);
|
||||
VectorMask<E> badMask = checkIndex0(offset, limit, iota, vlength);
|
||||
return this.andNot(badMask);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
AbstractVector<E>
|
||||
toVectorTemplate() {
|
||||
AbstractSpecies<E> vsp = vspecies();
|
||||
Vector<E> zero = vsp.broadcast(0);
|
||||
Vector<E> mone = vsp.broadcast(-1);
|
||||
// -1 will result in the most significant bit being set in
|
||||
// addition to some or all other lane bits.
|
||||
// For integral types, *all* lane bits will be set.
|
||||
// The bits for -1.0 are like {0b10111*0000*}.
|
||||
// FIXME: Use a conversion intrinsic for this operation.
|
||||
// https://bugs.openjdk.java.net/browse/JDK-8225740
|
||||
return (AbstractVector<E>) zero.blend(mone, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test if a masked memory access at a given offset into an array
|
||||
* of the given length will stay within the array.
|
||||
* The per-lane offsets are iota*esize.
|
||||
*/
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
void checkIndexByLane(int offset, int alength,
|
||||
Vector<E> iota,
|
||||
int esize) {
|
||||
if (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK == 0) {
|
||||
return;
|
||||
}
|
||||
// Although the specification is simple, the implementation is
|
||||
// tricky, because the value iota*esize might possibly
|
||||
// overflow. So we calculate our test values as scalars,
|
||||
// clipping to the range [-1..VLENGTH], and test them against
|
||||
// the unscaled iota vector, whose values are in [0..VLENGTH-1].
|
||||
int vlength = length();
|
||||
VectorMask<E> badMask;
|
||||
if (esize == 1) {
|
||||
badMask = checkIndex0(offset, alength, iota, vlength);
|
||||
} else if (offset >= 0) {
|
||||
// Masked access to multi-byte lanes in byte array.
|
||||
// It could be aligned anywhere.
|
||||
int elemCount = Math.min(vlength, (alength - offset) / esize);
|
||||
badMask = checkIndex0(0, elemCount, iota, vlength);
|
||||
} else {
|
||||
// This requires a split test.
|
||||
int clipOffset = Math.max(offset, -(vlength * esize));
|
||||
int elemCount = Math.min(vlength, (alength - clipOffset) / esize);
|
||||
badMask = checkIndex0(0, elemCount, iota, vlength);
|
||||
clipOffset &= (esize - 1); // power of two, so OK
|
||||
VectorMask<E> badMask2 = checkIndex0(clipOffset / esize, vlength,
|
||||
iota, vlength);
|
||||
badMask = badMask.or(badMask2);
|
||||
}
|
||||
badMask = badMask.and(this);
|
||||
if (badMask.anyTrue()) {
|
||||
int badLane = badMask.firstTrue();
|
||||
throw ((AbstractMask<E>)badMask)
|
||||
.checkIndexFailed(offset, badLane, alength, esize);
|
||||
}
|
||||
}
|
||||
|
||||
private
|
||||
@ForceInline
|
||||
VectorMask<E> checkIndex0(int offset, int alength,
|
||||
Vector<E> iota, int vlength) {
|
||||
// An active lane is bad if its number is greater than
|
||||
// alength-offset, since when added to offset it will step off
|
||||
// of the end of the array. To avoid overflow when
|
||||
// converting, clip the comparison value to [0..vlength]
|
||||
// inclusive.
|
||||
int indexLimit = Math.max(0, Math.min(alength - offset, vlength));
|
||||
VectorMask<E> badMask =
|
||||
iota.compare(GE, iota.broadcast(indexLimit));
|
||||
if (offset < 0) {
|
||||
// An active lane is bad if its number is less than
|
||||
// -offset, because when added to offset it will then
|
||||
// address an array element at a negative index. To avoid
|
||||
// overflow when converting, clip the comparison value at
|
||||
// vlength. This specific expression works correctly even
|
||||
// when offset is Integer.MIN_VALUE.
|
||||
int firstGoodIndex = -Math.max(offset, -vlength);
|
||||
VectorMask<E> badMask2 =
|
||||
iota.compare(LT, iota.broadcast(firstGoodIndex));
|
||||
if (indexLimit >= vlength) {
|
||||
badMask = badMask2; // 1st badMask is all true
|
||||
} else {
|
||||
badMask = badMask.or(badMask2);
|
||||
}
|
||||
}
|
||||
return badMask;
|
||||
}
|
||||
|
||||
private IndexOutOfBoundsException checkIndexFailed(int offset, int lane,
|
||||
int alength, int esize) {
|
||||
String msg = String.format("Masked range check failed: "+
|
||||
"vector mask %s out of bounds at "+
|
||||
"index %d+%d in array of length %d",
|
||||
this, offset, lane * esize, alength);
|
||||
if (esize != 1) {
|
||||
msg += String.format(" (each lane spans %d array elements)", esize);
|
||||
}
|
||||
throw new IndexOutOfBoundsException(msg);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,246 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.util.function.IntUnaryOperator;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
|
||||
abstract class AbstractShuffle<E> extends VectorShuffle<E> {
|
||||
static final IntUnaryOperator IDENTITY = i -> i;
|
||||
|
||||
// Internal representation allows for a maximum index of 256
|
||||
// Values are clipped to [-VLENGTH..VLENGTH-1].
|
||||
|
||||
AbstractShuffle(int length, byte[] reorder) {
|
||||
super(reorder);
|
||||
assert(length == reorder.length);
|
||||
assert(indexesInRange(reorder));
|
||||
}
|
||||
|
||||
AbstractShuffle(int length, int[] reorder) {
|
||||
this(length, reorder, 0);
|
||||
}
|
||||
|
||||
AbstractShuffle(int length, int[] reorder, int offset) {
|
||||
super(prepare(length, reorder, offset));
|
||||
}
|
||||
|
||||
AbstractShuffle(int length, IntUnaryOperator f) {
|
||||
super(prepare(length, f));
|
||||
}
|
||||
|
||||
private static byte[] prepare(int length, int[] reorder, int offset) {
|
||||
byte[] a = new byte[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
int si = reorder[offset + i];
|
||||
si = partiallyWrapIndex(si, length);
|
||||
a[i] = (byte) si;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
private static byte[] prepare(int length, IntUnaryOperator f) {
|
||||
byte[] a = new byte[length];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
int si = f.applyAsInt(i);
|
||||
si = partiallyWrapIndex(si, length);
|
||||
a[i] = (byte) si;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
byte[] reorder() {
|
||||
return (byte[])getPayload();
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
abstract AbstractSpecies<E> vspecies();
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final VectorSpecies<E> vectorSpecies() {
|
||||
return vspecies();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public void intoArray(int[] a, int offset) {
|
||||
byte[] reorder = reorder();
|
||||
int vlen = reorder.length;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
int sourceIndex = reorder[i];
|
||||
assert(sourceIndex >= -vlen && sourceIndex < vlen);
|
||||
a[offset + i] = sourceIndex;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public int[] toArray() {
|
||||
byte[] reorder = reorder();
|
||||
int[] a = new int[reorder.length];
|
||||
intoArray(a, 0);
|
||||
return a;
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final
|
||||
AbstractVector<E>
|
||||
toVectorTemplate() {
|
||||
// Note that the values produced by laneSource
|
||||
// are already clipped. At this point we convert
|
||||
// them from internal ints (or bytes) into the ETYPE.
|
||||
// FIXME: Use a conversion intrinsic for this operation.
|
||||
// https://bugs.openjdk.java.net/browse/JDK-8225740
|
||||
return (AbstractVector<E>) vspecies().fromIntValues(toArray());
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public final VectorShuffle<E> checkIndexes() {
|
||||
// FIXME: vectorize this
|
||||
for (int index : reorder()) {
|
||||
if (index < 0) {
|
||||
throw checkIndexFailed(index, length());
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public final VectorShuffle<E> wrapIndexes() {
|
||||
// FIXME: vectorize this
|
||||
byte[] reorder = reorder();
|
||||
int length = reorder.length;
|
||||
for (int index : reorder) {
|
||||
if (index < 0) {
|
||||
return wrapAndRebuild(reorder);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public final VectorShuffle<E> wrapAndRebuild(byte[] oldReorder) {
|
||||
int length = oldReorder.length;
|
||||
byte[] reorder = new byte[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
int si = oldReorder[i];
|
||||
// FIXME: This does not work unless it's a power of 2.
|
||||
if ((length & (length - 1)) == 0) {
|
||||
si += si & length; // power-of-two optimization
|
||||
} else if (si < 0) {
|
||||
// non-POT code requires a conditional add
|
||||
si += length;
|
||||
}
|
||||
assert(si >= 0 && si < length);
|
||||
reorder[i] = (byte) si;
|
||||
}
|
||||
return vspecies().dummyVector().shuffleFromBytes(reorder);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public final VectorMask<E> laneIsValid() {
|
||||
// FIXME: vectorize this
|
||||
byte[] reorder = reorder();
|
||||
int length = reorder.length;
|
||||
boolean[] bits = new boolean[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (reorder[i] >= 0) {
|
||||
bits[i] = true;
|
||||
}
|
||||
}
|
||||
return vspecies().dummyVector().maskFromArray(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
@SuppressWarnings("unchecked")
|
||||
public final
|
||||
<F> VectorShuffle<F> check(VectorSpecies<F> species) {
|
||||
if (species != vectorSpecies()) {
|
||||
throw AbstractSpecies.checkFailed(this, species);
|
||||
}
|
||||
return (VectorShuffle<F>) this;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final int checkIndex(int index) {
|
||||
return checkIndex0(index, length(), (byte)1);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final int wrapIndex(int index) {
|
||||
return checkIndex0(index, length(), (byte)0);
|
||||
}
|
||||
|
||||
/** Return invalid indexes partially wrapped
|
||||
* mod VLENGTH to negative values.
|
||||
*/
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
static
|
||||
int partiallyWrapIndex(int index, int laneCount) {
|
||||
return checkIndex0(index, laneCount, (byte)-1);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
static int checkIndex0(int index, int laneCount, byte mode) {
|
||||
int wrapped = VectorIntrinsics.wrapToRange(index, laneCount);
|
||||
if (mode == 0 || wrapped == index) {
|
||||
return wrapped;
|
||||
}
|
||||
if (mode < 0) {
|
||||
return wrapped - laneCount; // special mode for internal storage
|
||||
}
|
||||
throw checkIndexFailed(index, laneCount);
|
||||
}
|
||||
|
||||
private static IndexOutOfBoundsException checkIndexFailed(int index, int laneCount) {
|
||||
int max = laneCount - 1;
|
||||
String msg = "required an index in [0.."+max+"] but found "+index;
|
||||
return new IndexOutOfBoundsException(msg);
|
||||
}
|
||||
|
||||
static boolean indexesInRange(byte[] reorder) {
|
||||
int length = reorder.length;
|
||||
for (byte si : reorder) {
|
||||
if (si >= length || si < -length) {
|
||||
boolean assertsEnabled = false;
|
||||
assert(assertsEnabled = true);
|
||||
if (assertsEnabled) {
|
||||
String msg = ("index "+si+"out of range ["+length+"] in "+
|
||||
java.util.Arrays.toString(reorder));
|
||||
throw new AssertionError(msg);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,658 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.annotation.Stable;
|
||||
import java.nio.ByteOrder;
|
||||
import java.lang.reflect.Array;
|
||||
import java.util.Arrays;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
abstract class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSupport.VectorSpecies<E>
|
||||
implements VectorSpecies<E> {
|
||||
@Stable
|
||||
final VectorShape vectorShape;
|
||||
@Stable
|
||||
final LaneType laneType;
|
||||
@Stable
|
||||
final int laneCount;
|
||||
@Stable
|
||||
final int laneCountLog2P1;
|
||||
@Stable
|
||||
final Class<? extends AbstractVector<E>> vectorType;
|
||||
@Stable
|
||||
final Class<? extends AbstractMask<E>> maskType;
|
||||
@Stable
|
||||
final Function<Object, ? extends AbstractVector<E>> vectorFactory;
|
||||
|
||||
@Stable
|
||||
final VectorShape indexShape;
|
||||
@Stable
|
||||
final int maxScale, minScale;
|
||||
@Stable
|
||||
final int vectorBitSize, vectorByteSize;
|
||||
|
||||
AbstractSpecies(VectorShape vectorShape,
|
||||
LaneType laneType,
|
||||
Class<? extends AbstractVector<E>> vectorType,
|
||||
Class<? extends AbstractMask<E>> maskType,
|
||||
Function<Object, ? extends AbstractVector<E>> vectorFactory) {
|
||||
this.vectorShape = vectorShape;
|
||||
this.laneType = laneType;
|
||||
this.vectorType = vectorType;
|
||||
this.maskType = maskType;
|
||||
this.vectorFactory = vectorFactory;
|
||||
|
||||
// derived values:
|
||||
int bitSize = vectorShape.vectorBitSize();
|
||||
int byteSize = bitSize / Byte.SIZE;
|
||||
assert(byteSize * 8 == bitSize);
|
||||
this.vectorBitSize = bitSize;
|
||||
this.vectorByteSize = byteSize;
|
||||
int elementSize = laneType.elementSize;
|
||||
this.laneCount = bitSize / elementSize;
|
||||
assert(laneCount > 0); // could be 1 for mono-vector (double in v64)
|
||||
this.laneCountLog2P1 = Integer.numberOfTrailingZeros(laneCount) + 1;
|
||||
|
||||
// Note: The shape might be the max-shape,
|
||||
// if there is no vector this large.
|
||||
int indexBitSize = Integer.SIZE * laneCount;
|
||||
this.indexShape = VectorShape.forIndexBitSize(indexBitSize, elementSize);
|
||||
|
||||
// What are the largest and smallest scale factors that,
|
||||
// when multiplied times the elements in [0..VLENGTH],
|
||||
// inclusive, do not overflow the ETYPE?
|
||||
int precision = laneType.elementPrecision;
|
||||
if (precision >= Integer.SIZE) {
|
||||
// No overflow possible from int*int.
|
||||
this.maxScale = Integer.MAX_VALUE;
|
||||
this.minScale = Integer.MIN_VALUE;
|
||||
} else {
|
||||
boolean isfp = (laneType.elementKind == 'F');
|
||||
long x = laneCount;
|
||||
long maxScale = ((1L << precision)-(isfp?0:1)) / x;
|
||||
long minScale = (-1L << precision) / x;
|
||||
this.maxScale = (int) maxScale;
|
||||
this.minScale = (int) minScale;
|
||||
}
|
||||
}
|
||||
|
||||
@Stable //lazy JIT constant
|
||||
AbstractSpecies<Integer> indexSpecies;
|
||||
|
||||
@Stable //lazy JIT constant
|
||||
AbstractShuffle<Byte> swapBytesShuffle;
|
||||
|
||||
@Stable //lazy JIT constant
|
||||
AbstractVector<E> dummyVector;
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final int length() {
|
||||
return laneCount;
|
||||
}
|
||||
|
||||
// Inside the implementation we use the more descriptive
|
||||
// term laneCount:
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final int laneCount() {
|
||||
return laneCount;
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final int laneCountLog2() {
|
||||
return laneCountLog2P1 - 1; // subtract one from stable value
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
@SuppressWarnings("unchecked")
|
||||
//NOT FINAL: SPECIALIZED
|
||||
public Class<E> elementType() {
|
||||
return (Class<E>) laneType.elementType;
|
||||
}
|
||||
|
||||
// FIXME: appeal to general method (see https://bugs.openjdk.java.net/browse/JDK-6176992)
|
||||
// replace usages of this method and remove
|
||||
@ForceInline
|
||||
@SuppressWarnings("unchecked")
|
||||
//NOT FINAL: SPECIALIZED
|
||||
Class<E> genericElementType() {
|
||||
return (Class<E>) laneType.genericElementType;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
//NOT FINAL: SPECIALIZED
|
||||
public Class<? extends AbstractVector<E>> vectorType() {
|
||||
return vectorType;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Class<? extends AbstractMask<E>> maskType() {
|
||||
return maskType;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final int elementSize() {
|
||||
return laneType.elementSize;
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final int elementByteSize() {
|
||||
return laneType.elementSize / Byte.SIZE;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final VectorShape vectorShape() {
|
||||
return vectorShape;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
final VectorShape indexShape() {
|
||||
return indexShape;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final int vectorBitSize() {
|
||||
return vectorBitSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final int vectorByteSize() {
|
||||
return vectorByteSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final int loopBound(int length) {
|
||||
return VectorIntrinsics.roundDown(length, laneCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final VectorMask<E> indexInRange(int offset, int limit) {
|
||||
return maskAll(true).indexInRange(offset, limit);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F> VectorSpecies<F> withLanes(Class<F> newType) {
|
||||
return withLanes(LaneType.of(newType)).check(newType);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
final
|
||||
AbstractSpecies<?> withLanes(LaneType newType) {
|
||||
if (newType == laneType) return this;
|
||||
return findSpecies(newType, vectorShape);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
AbstractSpecies<?> asIntegral() {
|
||||
return withLanes(laneType.asIntegral());
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
AbstractSpecies<?> asFloating() {
|
||||
return withLanes(laneType.asFloating());
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
@SuppressWarnings("unchecked")
|
||||
public final VectorSpecies<E> withShape(VectorShape newShape) {
|
||||
if (newShape == vectorShape) return this;
|
||||
return (VectorSpecies<E>) findSpecies(laneType, newShape);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
AbstractSpecies<Integer> indexSpecies() {
|
||||
// This JITs to a constant value:
|
||||
AbstractSpecies<Integer> sp = indexSpecies;
|
||||
if (sp != null) return sp;
|
||||
return indexSpecies = findSpecies(LaneType.INT, indexShape).check0(int.class);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
@SuppressWarnings("unchecked")
|
||||
AbstractSpecies<Byte> byteSpecies() {
|
||||
// This JITs to a constant value:
|
||||
return (AbstractSpecies<Byte>) withLanes(LaneType.BYTE);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
AbstractShuffle<Byte> swapBytesShuffle() {
|
||||
// This JITs to a constant value:
|
||||
AbstractShuffle<Byte> sh = swapBytesShuffle;
|
||||
if (sh != null) return sh;
|
||||
return swapBytesShuffle = makeSwapBytesShuffle();
|
||||
}
|
||||
private AbstractShuffle<Byte> makeSwapBytesShuffle() {
|
||||
int vbytes = vectorByteSize();
|
||||
int lbytes = elementByteSize();
|
||||
int[] sourceIndexes = new int[vbytes];
|
||||
for (int i = 0; i < vbytes; i++) {
|
||||
sourceIndexes[i] = i ^ (lbytes-1);
|
||||
}
|
||||
return (AbstractShuffle<Byte>)
|
||||
VectorShuffle.fromValues(byteSpecies(), sourceIndexes);
|
||||
}
|
||||
/*package-private*/
|
||||
abstract Vector<E> fromIntValues(int[] values);
|
||||
|
||||
/**
|
||||
* Do not use a dummy except to call methods on it when you don't
|
||||
* care about the lane values. The main benefit of it is to
|
||||
* populate the type profile, which then allows the JIT to derive
|
||||
* constant values for dummy.species(), the current species, and
|
||||
* then for all of its attributes: ETYPE, VLENGTH, VSHAPE, etc.
|
||||
*/
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
AbstractVector<E> dummyVector() {
|
||||
// This JITs to a constant value:
|
||||
AbstractVector<E> dummy = dummyVector;
|
||||
if (dummy != null) return dummy;
|
||||
// The rest of this computation is probably not JIT-ted.
|
||||
return makeDummyVector();
|
||||
}
|
||||
private AbstractVector<E> makeDummyVector() {
|
||||
Object za = Array.newInstance(elementType(), laneCount);
|
||||
return dummyVector = vectorFactory.apply(za);
|
||||
// This is the only use of vectorFactory.
|
||||
// All other factory requests are routed
|
||||
// through the dummy vector.
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a mask by directly calling its constructor.
|
||||
* It is an error if the array is aliased elsewhere.
|
||||
*/
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
AbstractMask<E> maskFactory(boolean[] bits) {
|
||||
return dummyVector().maskFromArray(bits);
|
||||
}
|
||||
|
||||
public final
|
||||
@Override
|
||||
@ForceInline
|
||||
VectorShuffle<E> shuffleFromArray(int[] sourceIndexes, int offset) {
|
||||
return dummyVector().shuffleFromArray(sourceIndexes, offset);
|
||||
}
|
||||
|
||||
public final
|
||||
@Override
|
||||
@ForceInline
|
||||
VectorShuffle<E> shuffleFromValues(int... sourceIndexes) {
|
||||
return dummyVector().shuffleFromArray(sourceIndexes, 0);
|
||||
}
|
||||
|
||||
public final
|
||||
@Override
|
||||
@ForceInline
|
||||
VectorShuffle<E> shuffleFromOp(IntUnaryOperator fn) {
|
||||
return dummyVector().shuffleFromOp(fn);
|
||||
}
|
||||
|
||||
public final
|
||||
@Override
|
||||
@ForceInline
|
||||
VectorShuffle<E> iotaShuffle(int start, int step, boolean wrap) {
|
||||
AbstractShuffle<E> res;
|
||||
if (start == 0 && step == 1)
|
||||
return dummyVector().iotaShuffle();
|
||||
else
|
||||
return dummyVector().iotaShuffle(start, step, wrap);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Vector<E> fromByteArray(byte[] a, int offset, ByteOrder bo) {
|
||||
return dummyVector()
|
||||
.fromByteArray0(a, offset)
|
||||
.maybeSwap(bo);
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorMask<E> loadMask(boolean[] bits, int offset) {
|
||||
return VectorMask.fromArray(this, bits, offset);
|
||||
}
|
||||
|
||||
// Define zero and iota when we know the ETYPE and VSHAPE.
|
||||
public abstract AbstractVector<E> zero();
|
||||
/*package-private*/ abstract AbstractVector<E> iota();
|
||||
|
||||
// Constructing vectors from raw bits.
|
||||
|
||||
/*package-private*/
|
||||
abstract long longToElementBits(long e);
|
||||
|
||||
/*package-private*/
|
||||
abstract AbstractVector<E> broadcastBits(long bits);
|
||||
|
||||
/*package-private*/
|
||||
final IllegalArgumentException badElementBits(long iv, Object cv) {
|
||||
String msg = String.format("Vector creation failed: "+
|
||||
"value %s cannot be represented in ETYPE %s"+
|
||||
"; result of cast is %s",
|
||||
iv,
|
||||
elementType(),
|
||||
cv);
|
||||
return new IllegalArgumentException(msg);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
static
|
||||
final IllegalArgumentException badArrayBits(Object iv,
|
||||
boolean isInt,
|
||||
long cv) {
|
||||
String msg = String.format("Array creation failed: "+
|
||||
"lane value %s cannot be represented in %s"+
|
||||
"; result of cast is %s",
|
||||
iv,
|
||||
(isInt ? "int" : "long"),
|
||||
cv);
|
||||
return new IllegalArgumentException(msg);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
Object iotaArray() {
|
||||
// Create an iota array. It's OK if this is really slow,
|
||||
// because it happens only once per species.
|
||||
Object ia = Array.newInstance(laneType.elementType,
|
||||
laneCount);
|
||||
assert(ia.getClass() == laneType.arrayType);
|
||||
checkValue(laneCount-1); // worst case
|
||||
for (int i = 0; i < laneCount; i++) {
|
||||
if ((byte)i == i)
|
||||
Array.setByte(ia, i, (byte)i);
|
||||
else if ((short)i == i)
|
||||
Array.setShort(ia, i, (short)i);
|
||||
else
|
||||
Array.setInt(ia, i, i);
|
||||
assert(Array.getDouble(ia, i) == i);
|
||||
}
|
||||
return ia;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
void checkScale(int scale) {
|
||||
if (scale > 0) {
|
||||
if (scale <= maxScale) return;
|
||||
} else { // scale <= 0
|
||||
if (scale >= minScale) return;
|
||||
}
|
||||
throw checkScaleFailed(scale);
|
||||
}
|
||||
private IllegalArgumentException checkScaleFailed(int scale) {
|
||||
String msg = String.format("%s: cannot represent VLENGTH*%d",
|
||||
this, scale);
|
||||
return new IllegalArgumentException(msg);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
interface RVOp {
|
||||
long apply(int i); // supply raw element bits
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
abstract AbstractVector<E> rvOp(RVOp f);
|
||||
|
||||
/*package-private*/
|
||||
interface FOpm {
|
||||
boolean apply(int i);
|
||||
}
|
||||
|
||||
AbstractMask<E> opm(FOpm f) {
|
||||
boolean[] res = new boolean[laneCount];
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i);
|
||||
}
|
||||
return dummyVector().maskFromArray(res);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
<F> VectorSpecies<F> check(Class<F> elementType) {
|
||||
return check0(elementType);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@SuppressWarnings("unchecked")
|
||||
/*package-private*/ final
|
||||
<F> AbstractSpecies<F> check0(Class<F> elementType) {
|
||||
if (elementType != this.elementType()) {
|
||||
throw AbstractSpecies.checkFailed(this, elementType);
|
||||
}
|
||||
return (AbstractSpecies<F>) this;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
AbstractSpecies<E> check(LaneType laneType) {
|
||||
if (laneType != this.laneType) {
|
||||
throw AbstractSpecies.checkFailed(this, laneType);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public int partLimit(VectorSpecies<?> toSpecies, boolean lanewise) {
|
||||
AbstractSpecies<?> rsp = (AbstractSpecies<?>) toSpecies;
|
||||
int inSizeLog2 = this.vectorShape.vectorBitSizeLog2;
|
||||
int outSizeLog2 = rsp.vectorShape.vectorBitSizeLog2;
|
||||
if (lanewise) {
|
||||
inSizeLog2 += (rsp.laneType.elementSizeLog2 -
|
||||
this.laneType.elementSizeLog2);
|
||||
}
|
||||
int diff = (inSizeLog2 - outSizeLog2);
|
||||
// Let's try a branch-free version of this.
|
||||
int sign = (diff >> -1);
|
||||
//d = Math.abs(diff);
|
||||
//d = (sign == 0 ? diff : sign == -1 ? 1 + ~diff);
|
||||
int d = (diff ^ sign) - sign;
|
||||
// Compute sgn(diff) << abs(diff), but replace 1 by 0.
|
||||
return ((sign | 1) << d) & ~1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper for throwing CheckCastExceptions,
|
||||
* used by the various Vector*.check(*) methods.
|
||||
*/
|
||||
/*package-private*/
|
||||
static ClassCastException checkFailed(Object what, Object required) {
|
||||
// Find a species for the thing that's failing.
|
||||
AbstractSpecies<?> whatSpecies = null;
|
||||
String where;
|
||||
if (what instanceof VectorSpecies) {
|
||||
whatSpecies = (AbstractSpecies<?>) what;
|
||||
where = whatSpecies.toString();
|
||||
} else if (what instanceof Vector) {
|
||||
whatSpecies = (AbstractSpecies<?>) ((Vector<?>) what).species();
|
||||
where = "a Vector<"+whatSpecies.genericElementType()+">";
|
||||
} else if (what instanceof VectorMask) {
|
||||
whatSpecies = (AbstractSpecies<?>) ((VectorMask<?>) what).vectorSpecies();
|
||||
where = "a VectorMask<"+whatSpecies.genericElementType()+">";
|
||||
} else if (what instanceof VectorShuffle) {
|
||||
whatSpecies = (AbstractSpecies<?>) ((VectorShuffle<?>) what).vectorSpecies();
|
||||
where = "a VectorShuffle<"+whatSpecies.genericElementType()+">";
|
||||
} else {
|
||||
where = what.toString();
|
||||
}
|
||||
|
||||
Object found = null;
|
||||
if (whatSpecies != null) {
|
||||
if (required instanceof VectorSpecies) {
|
||||
// required is a VectorSpecies; found the wrong species
|
||||
found = whatSpecies;
|
||||
} else if (required instanceof Vector) {
|
||||
// same VectorSpecies required; found the wrong species
|
||||
found = whatSpecies;
|
||||
required = ((Vector<?>)required).species();
|
||||
} else if (required instanceof Class) {
|
||||
// required is a Class; found the wrong ETYPE
|
||||
Class<?> requiredClass = (Class<?>) required;
|
||||
LaneType requiredType = LaneType.forClassOrNull(requiredClass);
|
||||
found = whatSpecies.elementType();
|
||||
if (requiredType == null) {
|
||||
required = required + " (not a valid lane type)";
|
||||
} else if (!requiredClass.isPrimitive()) {
|
||||
required = required + " (should be " + requiredType + ")";
|
||||
}
|
||||
} else if (required instanceof LaneType) {
|
||||
// required is a LaneType; found the wrong ETYPE
|
||||
required = ((LaneType) required).elementType;
|
||||
found = whatSpecies.elementType();
|
||||
} else if (required instanceof Integer) {
|
||||
// required is a length; species has wrong VLENGTH
|
||||
required = required + " lanes";
|
||||
found = whatSpecies.length();
|
||||
}
|
||||
}
|
||||
if (found == null) found = "bad value";
|
||||
|
||||
String msg = where+": required "+required+" but found "+found;
|
||||
return new ClassCastException(msg);
|
||||
}
|
||||
|
||||
private static final @Stable AbstractSpecies<?>[][] CACHES
|
||||
= new AbstractSpecies<?>[LaneType.SK_LIMIT][VectorShape.SK_LIMIT];
|
||||
|
||||
// Helper functions for finding species:
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
static <E>
|
||||
AbstractSpecies<E> findSpecies(Class<E> elementType,
|
||||
LaneType laneType,
|
||||
VectorShape shape) {
|
||||
assert(elementType == laneType.elementType);
|
||||
return findSpecies(laneType, shape).check0(elementType);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
static
|
||||
AbstractSpecies<?> findSpecies(LaneType laneType,
|
||||
VectorShape shape) {
|
||||
// The JIT can see into this cache.
|
||||
// Therefore it is useful to arrange for constant
|
||||
// arguments to this method. If the cache
|
||||
// is full when the JIT runs, the cache item becomes
|
||||
// a compile-time constant. And then all the @Stable
|
||||
// fields of the AbstractSpecies are also constants.
|
||||
AbstractSpecies<?> s = CACHES[laneType.switchKey][shape.switchKey];
|
||||
if (s != null) return s;
|
||||
return computeSpecies(laneType, shape);
|
||||
}
|
||||
|
||||
private static
|
||||
AbstractSpecies<?> computeSpecies(LaneType laneType,
|
||||
VectorShape shape) {
|
||||
AbstractSpecies<?> s = null;
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (laneType.switchKey) {
|
||||
case LaneType.SK_FLOAT:
|
||||
s = FloatVector.species(shape); break;
|
||||
case LaneType.SK_DOUBLE:
|
||||
s = DoubleVector.species(shape); break;
|
||||
case LaneType.SK_BYTE:
|
||||
s = ByteVector.species(shape); break;
|
||||
case LaneType.SK_SHORT:
|
||||
s = ShortVector.species(shape); break;
|
||||
case LaneType.SK_INT:
|
||||
s = IntVector.species(shape); break;
|
||||
case LaneType.SK_LONG:
|
||||
s = LongVector.species(shape); break;
|
||||
}
|
||||
if (s == null) {
|
||||
// NOTE: The result of this method is guaranteed to be
|
||||
// non-null. Later calls to ".check" also ensure this.
|
||||
// If this method hits a NPE, it is because a helper
|
||||
// method EVector.species() has returned a null value, and
|
||||
// that is because a SPECIES_X static constant has not yet
|
||||
// been initialized. And that, in turn, is because
|
||||
// somebody is calling this method way too early during
|
||||
// bootstrapping.
|
||||
throw new AssertionError("bootstrap problem");
|
||||
}
|
||||
assert(s.laneType == laneType) : s + "!=" + laneType;
|
||||
assert(s.vectorShape == shape) : s + "!=" + shape;
|
||||
CACHES[laneType.switchKey][shape.switchKey] = s;
|
||||
return s;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String toString() {
|
||||
return "Species["+laneType+", "+laneCount+", "+vectorShape+"]";
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean equals(Object obj) {
|
||||
if (obj instanceof AbstractSpecies) {
|
||||
AbstractSpecies<?> that = (AbstractSpecies<?>) obj;
|
||||
return (this.laneType == that.laneType &&
|
||||
this.laneCount == that.laneCount &&
|
||||
this.vectorShape == that.vectorShape);
|
||||
}
|
||||
return this == obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash code value for the shuffle,
|
||||
* based on the lane source indexes and the vector species.
|
||||
*
|
||||
* @return a hash code value for this shuffle
|
||||
*/
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
int[] a = { laneType.ordinal(), laneCount, vectorShape.ordinal() };
|
||||
return Arrays.hashCode(a);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,726 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
@SuppressWarnings("cast")
|
||||
abstract class AbstractVector<E> extends Vector<E> {
|
||||
/**
|
||||
* The order of vector bytes when stored in natural,
|
||||
* array elements of the same lane type.
|
||||
* This is the also the behavior of the
|
||||
* VectorSupport load/store instructions.
|
||||
* If these instructions gain the capability to do
|
||||
* byte swapping on the fly, add a bit to those
|
||||
* instructions, but let this polarity be the
|
||||
* "neutral" or "default" setting of the bit.
|
||||
*/
|
||||
/*package-private*/
|
||||
static final ByteOrder NATIVE_ENDIAN = ByteOrder.nativeOrder();
|
||||
|
||||
/**
|
||||
* The order of vector bytes as stored in the register
|
||||
* file. This becomes visible with the asRaw[Type]Vector
|
||||
* operations, which convert between the internal byte-wise
|
||||
* representation and the typed lane-wise representation.
|
||||
* It is very possible for a platform to have big-endian
|
||||
* memory layout and little-endian register layout,
|
||||
* so this is a different setting from NATIVE_ENDIAN.
|
||||
* In fact, both Intel and ARM use LE conventions here.
|
||||
* Future work may be needed for resolutely BE platforms.
|
||||
*/
|
||||
/*package-private*/
|
||||
static final ByteOrder REGISTER_ENDIAN = ByteOrder.LITTLE_ENDIAN;
|
||||
|
||||
/*package-private*/
|
||||
AbstractVector(Object bits) {
|
||||
super(bits);
|
||||
}
|
||||
|
||||
// Extractors
|
||||
|
||||
/*package-private*/
|
||||
abstract AbstractSpecies<E> vspecies();
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final VectorSpecies<E> species() {
|
||||
return vspecies();
|
||||
}
|
||||
|
||||
// Something to make types match up better:
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
<F> Vector<F> check(VectorSpecies<F> species) {
|
||||
return check0(species);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@SuppressWarnings("unchecked")
|
||||
/*package-private*/ final
|
||||
<F> AbstractVector<F> check0(VectorSpecies<F> species) {
|
||||
if (!sameSpecies(species)) {
|
||||
throw AbstractSpecies.checkFailed(this, species);
|
||||
}
|
||||
return (AbstractVector<F>) this;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
<F> Vector<F> check(Class<F> elementType) {
|
||||
return check0(elementType);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@SuppressWarnings("unchecked")
|
||||
/*package-private*/ final
|
||||
<F> AbstractVector<F> check0(Class<F> elementType) {
|
||||
if (this.elementType() != elementType) {
|
||||
throw AbstractSpecies.checkFailed(this, elementType);
|
||||
}
|
||||
return (AbstractVector<F>) this;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@SuppressWarnings("unchecked")
|
||||
/*package-private*/ final
|
||||
<F> AbstractVector<F> check(Vector<F> other) {
|
||||
if (!sameSpecies(other)) {
|
||||
throw AbstractSpecies.checkFailed(this, other);
|
||||
}
|
||||
return (AbstractVector<F>) this;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private boolean sameSpecies(Vector<?> other) {
|
||||
// It's simpler and faster to do a class check.
|
||||
boolean same = (this.getClass() == other.getClass());
|
||||
// Make sure it works, too!
|
||||
assert(same == (this.species() == other.species())) : same;
|
||||
return same;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private boolean sameSpecies(VectorSpecies<?> species) {
|
||||
// It's simpler and faster to do a class check,
|
||||
// even if you have to load a dummy vector.
|
||||
AbstractVector<?> other = ((AbstractSpecies<?>)species).dummyVector();
|
||||
boolean same = (this.getClass() == other.getClass());
|
||||
// Make sure it works, too!
|
||||
assert(same == (this.species() == species)) : same;
|
||||
return same;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final VectorMask<E> maskAll(boolean bit) {
|
||||
return species().maskAll(bit);
|
||||
}
|
||||
|
||||
// Make myself into a vector of the same shape
|
||||
// and same information content but different lane type
|
||||
/*package-private*/
|
||||
abstract AbstractVector<?> asVectorRaw(LaneType laneType);
|
||||
|
||||
// Make myself into a byte vector of the same shape
|
||||
/*package-private*/
|
||||
abstract ByteVector asByteVectorRaw();
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final AbstractVector<?> asVectorRawTemplate(LaneType laneType) {
|
||||
// NOTE: This assumes that convert0('X')
|
||||
// respects REGISTER_ENDIAN order.
|
||||
return convert0('X', vspecies().withLanes(laneType));
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
ByteVector asByteVectorRawTemplate() {
|
||||
return (ByteVector) asVectorRawTemplate(LaneType.BYTE);
|
||||
}
|
||||
|
||||
|
||||
abstract AbstractMask<E> maskFromArray(boolean[] bits);
|
||||
|
||||
abstract AbstractShuffle<E> iotaShuffle();
|
||||
|
||||
abstract AbstractShuffle<E> iotaShuffle(int start, int step, boolean wrap);
|
||||
|
||||
/*do not alias this byte array*/
|
||||
abstract AbstractShuffle<E> shuffleFromBytes(byte[] reorder);
|
||||
|
||||
abstract AbstractShuffle<E> shuffleFromArray(int[] indexes, int i);
|
||||
|
||||
abstract AbstractShuffle<E> shuffleFromOp(IntUnaryOperator fn);
|
||||
|
||||
/*package-private*/
|
||||
abstract AbstractVector<E> fromByteArray0(byte[] a, int offset);
|
||||
|
||||
/*package-private*/
|
||||
abstract AbstractVector<E> maybeSwap(ByteOrder bo);
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
VectorShuffle<Byte> swapBytesShuffle() {
|
||||
return vspecies().swapBytesShuffle();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public ShortVector reinterpretAsShorts() {
|
||||
return (ShortVector) asVectorRaw(LaneType.SHORT);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public IntVector reinterpretAsInts() {
|
||||
return (IntVector) asVectorRaw(LaneType.INT);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public LongVector reinterpretAsLongs() {
|
||||
return (LongVector) asVectorRaw(LaneType.LONG);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public FloatVector reinterpretAsFloats() {
|
||||
return (FloatVector) asVectorRaw(LaneType.FLOAT);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public DoubleVector reinterpretAsDoubles() {
|
||||
return (DoubleVector) asVectorRaw(LaneType.DOUBLE);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> convert(Conversion<E,F> conv, int part) {
|
||||
// Shape invariance is simple to implement.
|
||||
// It's part of the API because shape invariance
|
||||
// is the default mode of operation, and shape
|
||||
// shifting operations must advertise themselves.
|
||||
ConversionImpl<E,F> c = (ConversionImpl<E,F>) conv;
|
||||
@SuppressWarnings("unchecked")
|
||||
VectorSpecies<F> rsp = (VectorSpecies<F>)
|
||||
vspecies().withLanes(c.range());
|
||||
return convertShape(conv, rsp, part);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> castShape(VectorSpecies<F> toSpecies, int part) {
|
||||
// This is an odd mix of shape conversion plus
|
||||
// lanewise conversions. It seems to be useful
|
||||
// sometimes as a shorthand, though maybe we
|
||||
// can drop it.
|
||||
AbstractSpecies<E> vsp = vspecies();
|
||||
AbstractSpecies<F> rsp = (AbstractSpecies<F>) toSpecies;
|
||||
@SuppressWarnings("unchecked")
|
||||
ConversionImpl<E,F> c = (ConversionImpl<E,F>)
|
||||
ConversionImpl.ofCast(vsp.laneType, rsp.laneType);
|
||||
return convertShape(c, rsp, part);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc} <!--workaround-->
|
||||
*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public abstract <F>
|
||||
Vector<F> convertShape(Conversion<E,F> conv, VectorSpecies<F> rsp, int part);
|
||||
|
||||
/**
|
||||
* This is the template for Vector::reinterpretShape, to be
|
||||
* specialized by each distinct vector class.
|
||||
*/
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final <F>
|
||||
AbstractVector<F> reinterpretShapeTemplate(VectorSpecies<F> toSpecies, int part) {
|
||||
AbstractSpecies<F> rsp = (AbstractSpecies<F>) toSpecies;
|
||||
AbstractSpecies<E> vsp = vspecies();
|
||||
if (part == 0) {
|
||||
// Works the same for in-place, expand, or contract.
|
||||
return convert0('X', rsp);
|
||||
} else {
|
||||
int origin = shapeChangeOrigin(vsp, rsp, false, part);
|
||||
//System.out.println("*** origin = "+origin+", part = "+part+", reinterpret");
|
||||
if (part > 0) { // Expansion: slice first then cast.
|
||||
return slice(origin, vsp.zero()).convert0('X', rsp);
|
||||
} else { // Contraction: cast first then unslice.
|
||||
return rsp.zero().slice(rsp.laneCount() - origin,
|
||||
convert0('X', rsp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract AbstractVector<E> slice(int origin, Vector<E> v1);
|
||||
|
||||
/**
|
||||
* This is the template for Vector::convertShape, to be
|
||||
* specialized by each distinct vector class.
|
||||
*/
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final <F>
|
||||
AbstractVector<F> convertShapeTemplate(Conversion<E,F> conv, VectorSpecies<F> toSpecies, int part) {
|
||||
ConversionImpl<E,F> c = (ConversionImpl<E,F>) conv;
|
||||
AbstractSpecies<F> rsp = (AbstractSpecies<F>) toSpecies;
|
||||
AbstractSpecies<E> vsp = vspecies();
|
||||
char kind = c.kind();
|
||||
switch (kind) {
|
||||
case 'C': // Regular cast conversion, known to the JIT.
|
||||
break;
|
||||
case 'I': // Identity conversion => reinterpret.
|
||||
assert(c.sizeChangeLog2() == 0);
|
||||
kind = 'X';
|
||||
break;
|
||||
case 'Z': // Lane-wise expansion with zero padding.
|
||||
assert(c.sizeChangeLog2() > 0);
|
||||
assert(c.range().elementKind == 'I');
|
||||
break;
|
||||
case 'R': // Lane-wise reinterpret conversion.
|
||||
if (c.sizeChangeLog2() != 0) {
|
||||
kind = 'Z'; // some goofy stuff here
|
||||
break;
|
||||
}
|
||||
kind = 'X'; // No size change => reinterpret whole vector
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError(c);
|
||||
}
|
||||
vsp.check(c.domain()); // apply dynamic check to conv
|
||||
rsp.check(c.range()); // apply dynamic check to conv
|
||||
if (part == 0) {
|
||||
// Works the same for in-place, expand, or contract.
|
||||
return convert0(kind, rsp);
|
||||
} else {
|
||||
int origin = shapeChangeOrigin(vsp, rsp, true, part);
|
||||
//System.out.println("*** origin = "+origin+", part = "+part+", lanewise");
|
||||
if (part > 0) { // Expansion: slice first then cast.
|
||||
return slice(origin, vsp.zero()).convert0(kind, rsp);
|
||||
} else { // Contraction: cast first then unslice.
|
||||
return rsp.zero().slice(rsp.laneCount() - origin,
|
||||
convert0(kind, rsp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check a part number and return it multiplied by the appropriate
|
||||
* block factor to yield the origin of the operand block, as a
|
||||
* lane number. For expansions the origin is reckoned in the
|
||||
* domain vector, since the domain vector has too much information
|
||||
* and must be sliced. For contractions the origin is reckoned in
|
||||
* the range vector, since the range vector has too many lanes and
|
||||
* the result must be unsliced at the same position as the inverse
|
||||
* expansion. If the conversion is lanewise, then lane sizes may
|
||||
* be changing as well. This affects the logical size of the
|
||||
* result, and so the domain size is multiplied or divided by the
|
||||
* lane size change.
|
||||
*/
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
static
|
||||
int shapeChangeOrigin(AbstractSpecies<?> dsp,
|
||||
AbstractSpecies<?> rsp,
|
||||
boolean lanewise,
|
||||
int part) {
|
||||
int domSizeLog2 = dsp.vectorShape.vectorBitSizeLog2;
|
||||
int phySizeLog2 = rsp.vectorShape.vectorBitSizeLog2;
|
||||
int laneChangeLog2 = 0;
|
||||
if (lanewise) {
|
||||
laneChangeLog2 = (rsp.laneType.elementSizeLog2 -
|
||||
dsp.laneType.elementSizeLog2);
|
||||
}
|
||||
int resSizeLog2 = domSizeLog2 + laneChangeLog2;
|
||||
// resSizeLog2 = 0 => 1-lane vector shrinking to 1-byte lane-size
|
||||
// resSizeLog2 < 0 => small vector shrinking by more than a lane-size
|
||||
assert(resSizeLog2 >= 0);
|
||||
// Expansion ratio: expansionLog2 = resSizeLog2 - phySizeLog2;
|
||||
if (!partInRange(resSizeLog2, phySizeLog2, part)) {
|
||||
// fall through...
|
||||
} else if (resSizeLog2 > phySizeLog2) {
|
||||
// Expansion by M means we must slice a block from the domain.
|
||||
// What is that block size? It is 1/M of the domain.
|
||||
// Let's compute the log2 of that block size, as 's'.
|
||||
//s = (dsp.laneCountLog2() - expansionLog2);
|
||||
//s = ((domSizeLog2 - dsp.laneType.elementSizeLog2) - expansionLog2);
|
||||
//s = (domSizeLog2 - expansionLog2 - dsp.laneType.elementSizeLog2);
|
||||
int s = phySizeLog2 - laneChangeLog2 - dsp.laneType.elementSizeLog2;
|
||||
// Scale the part number by the input block size, in input lanes.
|
||||
if ((s & 31) == s) // sanity check
|
||||
return part << s;
|
||||
} else {
|
||||
// Contraction by M means we must drop a block into the range.
|
||||
// What is that block size? It is 1/M of the range.
|
||||
// Let's compute the log2 of that block size, as 's'.
|
||||
//s = (rsp.laneCountLog2() + expansionLog2);
|
||||
//s = ((phySizeLog2 - rsp.laneType.elementSizeLog2) + expansionLog2);
|
||||
//s = (phySizeLog2 + expansionLog2 - rsp.laneType.elementSizeLog2);
|
||||
int s = resSizeLog2 - rsp.laneType.elementSizeLog2;
|
||||
// Scale the part number by the output block size, in output lanes.
|
||||
if ((s & 31) == s) // sanity check
|
||||
return -part << s;
|
||||
}
|
||||
throw wrongPart(dsp, rsp, lanewise, part);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private static boolean partInRange(int resSizeLog2, int phySizeLog2, int part) {
|
||||
// Let's try a branch-free version of this.
|
||||
int diff = (resSizeLog2 - phySizeLog2);
|
||||
int sign = (diff >> -1);
|
||||
//d = Math.abs(diff);
|
||||
//d = (sign == 0 ? diff : sign == -1 ? 1 + ~diff);
|
||||
int d = (diff ^ sign) - sign;
|
||||
assert(d == Math.abs(diff) && d <= 16); // let's not go crazy here
|
||||
//p = part * sign;
|
||||
int p = (part ^ sign) - sign;
|
||||
// z = sign == 0 ? 0<=part<(1<<d), == (part & (-1 << d)) == 0
|
||||
// z = sign == -1 ? 0<=-part<(1<<d), == (-part & (-1 << d)) == 0
|
||||
boolean z = (p & (-1 << d)) == 0;
|
||||
assert(z == partInRangeSlow(resSizeLog2, phySizeLog2, part)) : z;
|
||||
return z;
|
||||
}
|
||||
|
||||
private static boolean partInRangeSlow(int resSizeLog2, int phySizeLog2, int part) {
|
||||
if (resSizeLog2 > phySizeLog2) { // expansion
|
||||
int limit = 1 << (resSizeLog2 - phySizeLog2);
|
||||
return part >= 0 && part < limit;
|
||||
} else if (resSizeLog2 < phySizeLog2) { // contraction
|
||||
int limit = 1 << (phySizeLog2 - resSizeLog2);
|
||||
return part > -limit && part <= 0;
|
||||
} else {
|
||||
return (part == 0);
|
||||
}
|
||||
}
|
||||
|
||||
private static
|
||||
ArrayIndexOutOfBoundsException
|
||||
wrongPart(AbstractSpecies<?> dsp,
|
||||
AbstractSpecies<?> rsp,
|
||||
boolean lanewise,
|
||||
int part) {
|
||||
String laneChange = "";
|
||||
String converting = "converting";
|
||||
int dsize = dsp.elementSize(), rsize = rsp.elementSize();
|
||||
if (!lanewise) {
|
||||
converting = "reinterpreting";
|
||||
} else if (dsize < rsize) {
|
||||
laneChange = String.format(" (lanes are expanding by %d)",
|
||||
rsize / dsize);
|
||||
} else if (dsize > rsize) {
|
||||
laneChange = String.format(" (lanes are contracting by %d)",
|
||||
dsize / rsize);
|
||||
}
|
||||
String msg = String.format("bad part number %d %s %s -> %s%s",
|
||||
part, converting, dsp, rsp, laneChange);
|
||||
return new ArrayIndexOutOfBoundsException(msg);
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
ArithmeticException divZeroException() {
|
||||
throw new ArithmeticException("zero vector lane in dividend "+this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for all sorts of byte-wise reinterpretation casts.
|
||||
* This function kicks in after intrinsic failure.
|
||||
*/
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final <F>
|
||||
AbstractVector<F> defaultReinterpret(AbstractSpecies<F> rsp) {
|
||||
int blen = Math.max(this.bitSize(), rsp.vectorBitSize()) / Byte.SIZE;
|
||||
ByteOrder bo = ByteOrder.LITTLE_ENDIAN;
|
||||
ByteBuffer bb = ByteBuffer.allocate(blen);
|
||||
this.intoByteBuffer(bb, 0, bo);
|
||||
VectorMask<F> m = rsp.maskAll(true);
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (rsp.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return ByteVector.fromByteBuffer(rsp.check(byte.class), bb, 0, bo, m.check(byte.class)).check0(rsp);
|
||||
case LaneType.SK_SHORT:
|
||||
return ShortVector.fromByteBuffer(rsp.check(short.class), bb, 0, bo, m.check(short.class)).check0(rsp);
|
||||
case LaneType.SK_INT:
|
||||
return IntVector.fromByteBuffer(rsp.check(int.class), bb, 0, bo, m.check(int.class)).check0(rsp);
|
||||
case LaneType.SK_LONG:
|
||||
return LongVector.fromByteBuffer(rsp.check(long.class), bb, 0, bo, m.check(long.class)).check0(rsp);
|
||||
case LaneType.SK_FLOAT:
|
||||
return FloatVector.fromByteBuffer(rsp.check(float.class), bb, 0, bo, m.check(float.class)).check0(rsp);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return DoubleVector.fromByteBuffer(rsp.check(double.class), bb, 0, bo, m.check(double.class)).check0(rsp);
|
||||
default:
|
||||
throw new AssertionError(rsp.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for all sorts of lane-wise conversions.
|
||||
* This function kicks in after intrinsic failure.
|
||||
*/
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final <F>
|
||||
AbstractVector<F> defaultCast(AbstractSpecies<F> dsp) {
|
||||
int rlength = dsp.laneCount;
|
||||
if (vspecies().laneType.elementKind == 'F') {
|
||||
// Buffer input values in a double array.
|
||||
double[] lanes = toDoubleArray();
|
||||
int limit = Math.min(lanes.length, rlength);
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (dsp.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE: {
|
||||
byte[] a = new byte[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (byte) lanes[i];
|
||||
}
|
||||
return ByteVector.fromArray(dsp.check(byte.class), a, 0).check0(dsp);
|
||||
}
|
||||
case LaneType.SK_SHORT: {
|
||||
short[] a = new short[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (short) lanes[i];
|
||||
}
|
||||
return ShortVector.fromArray(dsp.check(short.class), a, 0).check0(dsp);
|
||||
}
|
||||
case LaneType.SK_INT: {
|
||||
int[] a = new int[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (int) lanes[i];
|
||||
}
|
||||
return IntVector.fromArray(dsp.check(int.class), a, 0).check0(dsp);
|
||||
}
|
||||
case LaneType.SK_LONG: {
|
||||
long[] a = new long[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (long) lanes[i];
|
||||
}
|
||||
return LongVector.fromArray(dsp.check(long.class), a, 0).check0(dsp);
|
||||
}
|
||||
case LaneType.SK_FLOAT: {
|
||||
float[] a = new float[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (float) lanes[i];
|
||||
}
|
||||
return FloatVector.fromArray(dsp.check(float.class), a, 0).check0(dsp);
|
||||
}
|
||||
case LaneType.SK_DOUBLE: {
|
||||
double[] a = new double[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (double) lanes[i];
|
||||
}
|
||||
return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp);
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
} else {
|
||||
// Buffer input values in a long array.
|
||||
long[] lanes = toLongArray();
|
||||
int limit = Math.min(lanes.length, rlength);
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (dsp.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE: {
|
||||
byte[] a = new byte[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (byte) lanes[i];
|
||||
}
|
||||
return ByteVector.fromArray(dsp.check(byte.class), a, 0).check0(dsp);
|
||||
}
|
||||
case LaneType.SK_SHORT: {
|
||||
short[] a = new short[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (short) lanes[i];
|
||||
}
|
||||
return ShortVector.fromArray(dsp.check(short.class), a, 0).check0(dsp);
|
||||
}
|
||||
case LaneType.SK_INT: {
|
||||
int[] a = new int[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (int) lanes[i];
|
||||
}
|
||||
return IntVector.fromArray(dsp.check(int.class), a, 0).check0(dsp);
|
||||
}
|
||||
case LaneType.SK_LONG: {
|
||||
long[] a = new long[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (long) lanes[i];
|
||||
}
|
||||
return LongVector.fromArray(dsp.check(long.class), a, 0).check0(dsp);
|
||||
}
|
||||
case LaneType.SK_FLOAT: {
|
||||
float[] a = new float[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (float) lanes[i];
|
||||
}
|
||||
return FloatVector.fromArray(dsp.check(float.class), a, 0).check0(dsp);
|
||||
}
|
||||
case LaneType.SK_DOUBLE: {
|
||||
double[] a = new double[rlength];
|
||||
for (int i = 0; i < limit; i++) {
|
||||
a[i] = (double) lanes[i];
|
||||
}
|
||||
return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp);
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
// Constant-folded access to conversion intrinsics:
|
||||
|
||||
/**
|
||||
* Dispatch on conversion kind and target species.
|
||||
* The code of this is arranged to fold up if the
|
||||
* vector class is constant and the target species
|
||||
* is also constant. This is often the case.
|
||||
* Residual non-folded code may also perform acceptably
|
||||
* in some cases due to type profiling, especially
|
||||
* of rvtype. If only one shape is being used,
|
||||
* the profiling of rvtype should help speculatively
|
||||
* fold the code even when the target species is
|
||||
* not a constant.
|
||||
*/
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final <F>
|
||||
AbstractVector<F> convert0(char kind, AbstractSpecies<F> rsp) {
|
||||
// Derive some JIT-time constants:
|
||||
Class<?> etype; // fill in after switch (constant)
|
||||
int vlength; // fill in after switch (mark type profile?)
|
||||
Class<?> rvtype; // fill in after switch (mark type profile)
|
||||
Class<?> rtype;
|
||||
int rlength;
|
||||
switch (kind) {
|
||||
case 'Z': // lane-wise size change, maybe with sign clip
|
||||
// Maybe this should be an intrinsic also.
|
||||
AbstractSpecies<?> rspi = rsp.asIntegral();
|
||||
AbstractVector<?> bitv = resizeLanes0(this, rspi);
|
||||
return (rspi == rsp ? bitv.check0(rsp) : bitv.convert0('X', rsp));
|
||||
case 'C': // lane-wise cast (but not identity)
|
||||
rtype = rsp.elementType();
|
||||
rlength = rsp.laneCount();
|
||||
etype = this.elementType(); // (profile)
|
||||
vlength = this.length(); // (profile)
|
||||
rvtype = rsp.dummyVector().getClass(); // (profile)
|
||||
return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
|
||||
this.getClass(), etype, vlength,
|
||||
rvtype, rtype, rlength,
|
||||
this, rsp,
|
||||
AbstractVector::defaultCast);
|
||||
case 'X': // reinterpret cast, not lane-wise if lane sizes differ
|
||||
rtype = rsp.elementType();
|
||||
rlength = rsp.laneCount();
|
||||
etype = this.elementType(); // (profile)
|
||||
vlength = this.length(); // (profile)
|
||||
rvtype = rsp.dummyVector().getClass(); // (profile)
|
||||
return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET,
|
||||
this.getClass(), etype, vlength,
|
||||
rvtype, rtype, rlength,
|
||||
this, rsp,
|
||||
AbstractVector::defaultReinterpret);
|
||||
}
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
private static <F>
|
||||
AbstractVector<F>
|
||||
resizeLanes0(AbstractVector<?> v, AbstractSpecies<F> rspi) {
|
||||
AbstractSpecies<?> dsp = v.vspecies();
|
||||
int sizeChange = rspi.elementSize() - dsp.elementSize();
|
||||
AbstractSpecies<?> dspi = dsp.asIntegral();
|
||||
if (dspi != dsp) v = v.convert0('R', dspi);
|
||||
if (sizeChange <= 0) { // clip in place
|
||||
return v.convert0('C', rspi);
|
||||
}
|
||||
// extend in place, but remove unwanted sign extension
|
||||
long mask = -1L >>> sizeChange;
|
||||
return (AbstractVector<F>)
|
||||
v.convert0('C', rspi)
|
||||
.lanewise(AND, rspi.broadcast(mask));
|
||||
}
|
||||
|
||||
// Byte buffer wrappers.
|
||||
static ByteBuffer wrapper(ByteBuffer bb, ByteOrder bo) {
|
||||
return bb.duplicate().order(bo);
|
||||
}
|
||||
|
||||
static ByteBuffer wrapper(byte[] a, ByteOrder bo) {
|
||||
return ByteBuffer.wrap(a).order(bo);
|
||||
}
|
||||
|
||||
static {
|
||||
// Recode uses of VectorSupport.reinterpret if this assertion fails:
|
||||
assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,840 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import static jdk.internal.vm.vector.VectorSupport.*;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
// -- This file was mechanically generated: Do not edit! -- //
|
||||
|
||||
@SuppressWarnings("cast") // warning: redundant cast
|
||||
final class Byte128Vector extends ByteVector {
|
||||
static final ByteSpecies VSPECIES =
|
||||
(ByteSpecies) ByteVector.SPECIES_128;
|
||||
|
||||
static final VectorShape VSHAPE =
|
||||
VSPECIES.vectorShape();
|
||||
|
||||
static final Class<Byte128Vector> VCLASS = Byte128Vector.class;
|
||||
|
||||
static final int VSIZE = VSPECIES.vectorBitSize();
|
||||
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte128Vector(byte[] v) {
|
||||
super(v);
|
||||
}
|
||||
|
||||
// For compatibility as Byte128Vector::new,
|
||||
// stored into species.vectorFactory.
|
||||
Byte128Vector(Object v) {
|
||||
this((byte[]) v);
|
||||
}
|
||||
|
||||
static final Byte128Vector ZERO = new Byte128Vector(new byte[VLENGTH]);
|
||||
static final Byte128Vector IOTA = new Byte128Vector(VSPECIES.iotaArray());
|
||||
|
||||
static {
|
||||
// Warm up a few species caches.
|
||||
// If we do this too much we will
|
||||
// get NPEs from bootstrap circularity.
|
||||
VSPECIES.dummyVector();
|
||||
VSPECIES.withLanes(LaneType.BYTE);
|
||||
}
|
||||
|
||||
// Specialized extractors
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public ByteSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractVector, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Class<Byte> elementType() { return byte.class; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int elementSize() { return Byte.SIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final VectorShape shape() { return VSHAPE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int length() { return VLENGTH; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int bitSize() { return VSIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int byteSize() { return VSIZE / Byte.SIZE; }
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final @Override
|
||||
byte[] vec() {
|
||||
return (byte[])getPayload();
|
||||
}
|
||||
|
||||
// Virtualized constructors
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte128Vector broadcast(byte e) {
|
||||
return (Byte128Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte128Vector broadcast(long e) {
|
||||
return (Byte128Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte128Mask maskFromArray(boolean[] bits) {
|
||||
return new Byte128Mask(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte128Shuffle iotaShuffle() { return Byte128Shuffle.IOTA; }
|
||||
|
||||
@ForceInline
|
||||
Byte128Shuffle iotaShuffle(int start, int step, boolean wrap) {
|
||||
if (wrap) {
|
||||
return (Byte128Shuffle)VectorSupport.shuffleIota(ETYPE, Byte128Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
|
||||
} else {
|
||||
return (Byte128Shuffle)VectorSupport.shuffleIota(ETYPE, Byte128Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte128Shuffle shuffleFromBytes(byte[] reorder) { return new Byte128Shuffle(reorder); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte128Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte128Shuffle(indexes, i); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte128Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Byte128Shuffle(fn); }
|
||||
|
||||
// Make a vector of the same species but the given elements:
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte128Vector vectorFactory(byte[] vec) {
|
||||
return new Byte128Vector(vec);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte128Vector asByteVectorRaw() {
|
||||
return (Byte128Vector) super.asByteVectorRawTemplate(); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
AbstractVector<?> asVectorRaw(LaneType laneType) {
|
||||
return super.asVectorRawTemplate(laneType); // specialize
|
||||
}
|
||||
|
||||
// Unary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte128Vector uOp(FUnOp f) {
|
||||
return (Byte128Vector) super.uOpTemplate(f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte128Vector uOp(VectorMask<Byte> m, FUnOp f) {
|
||||
return (Byte128Vector)
|
||||
super.uOpTemplate((Byte128Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte128Vector bOp(Vector<Byte> v, FBinOp f) {
|
||||
return (Byte128Vector) super.bOpTemplate((Byte128Vector)v, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte128Vector bOp(Vector<Byte> v,
|
||||
VectorMask<Byte> m, FBinOp f) {
|
||||
return (Byte128Vector)
|
||||
super.bOpTemplate((Byte128Vector)v, (Byte128Mask)m,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
// Ternary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte128Vector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f) {
|
||||
return (Byte128Vector)
|
||||
super.tOpTemplate((Byte128Vector)v1, (Byte128Vector)v2,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte128Vector tOp(Vector<Byte> v1, Vector<Byte> v2,
|
||||
VectorMask<Byte> m, FTriOp f) {
|
||||
return (Byte128Vector)
|
||||
super.tOpTemplate((Byte128Vector)v1, (Byte128Vector)v2,
|
||||
(Byte128Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
byte rOp(byte v, FBinOp f) {
|
||||
return super.rOpTemplate(v, f); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> convertShape(VectorOperators.Conversion<Byte,F> conv,
|
||||
VectorSpecies<F> rsp, int part) {
|
||||
return super.convertShapeTemplate(conv, rsp, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
|
||||
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
|
||||
}
|
||||
|
||||
// Specialized algebraic operations:
|
||||
|
||||
// The following definition forces a specialized version of this
|
||||
// crucial method into the v-table of this class. A call to add()
|
||||
// will inline to a call to lanewise(ADD,), at which point the JIT
|
||||
// intrinsic will have the opcode of ADD, plus all the metadata
|
||||
// for this particular class, enabling it to generate precise
|
||||
// code.
|
||||
//
|
||||
// There is probably no benefit to the JIT to specialize the
|
||||
// masked or broadcast versions of the lanewise method.
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector lanewise(Unary op) {
|
||||
return (Byte128Vector) super.lanewiseTemplate(op); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector lanewise(Binary op, Vector<Byte> v) {
|
||||
return (Byte128Vector) super.lanewiseTemplate(op, v); // specialize
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline Byte128Vector
|
||||
lanewiseShift(VectorOperators.Binary op, int e) {
|
||||
return (Byte128Vector) super.lanewiseShiftTemplate(op, e); // specialize
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Byte128Vector
|
||||
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
|
||||
return (Byte128Vector) super.lanewiseTemplate(op, v1, v2); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Byte128Vector addIndex(int scale) {
|
||||
return (Byte128Vector) super.addIndexTemplate(scale); // specialize
|
||||
}
|
||||
|
||||
// Type specific horizontal reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final byte reduceLanes(VectorOperators.Associative op) {
|
||||
return super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final byte reduceLanes(VectorOperators.Associative op,
|
||||
VectorMask<Byte> m) {
|
||||
return super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op) {
|
||||
return (long) super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op,
|
||||
VectorMask<Byte> m) {
|
||||
return (long) super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorShuffle<Byte> toShuffle() {
|
||||
byte[] a = toArray();
|
||||
int[] sa = new int[a.length];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
sa[i] = (int) a[i];
|
||||
}
|
||||
return VectorShuffle.fromArray(VSPECIES, sa, 0);
|
||||
}
|
||||
|
||||
// Specialized unary testing
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte128Mask test(Test op) {
|
||||
return super.testTemplate(Byte128Mask.class, op); // specialize
|
||||
}
|
||||
|
||||
// Specialized comparisons
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte128Mask compare(Comparison op, Vector<Byte> v) {
|
||||
return super.compareTemplate(Byte128Mask.class, op, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte128Mask compare(Comparison op, byte s) {
|
||||
return super.compareTemplate(Byte128Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte128Mask compare(Comparison op, long s) {
|
||||
return super.compareTemplate(Byte128Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
|
||||
return (Byte128Vector)
|
||||
super.blendTemplate(Byte128Mask.class,
|
||||
(Byte128Vector) v,
|
||||
(Byte128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector slice(int origin, Vector<Byte> v) {
|
||||
return (Byte128Vector) super.sliceTemplate(origin, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector slice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Byte128Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(VLENGTH-origin))));
|
||||
Iota = iotaShuffle(origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector unslice(int origin, Vector<Byte> w, int part) {
|
||||
return (Byte128Vector) super.unsliceTemplate(origin, w, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m) {
|
||||
return (Byte128Vector)
|
||||
super.unsliceTemplate(Byte128Mask.class,
|
||||
origin, w, part,
|
||||
(Byte128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector unslice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Byte128Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((byte)(origin))));
|
||||
Iota = iotaShuffle(-origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector rearrange(VectorShuffle<Byte> s) {
|
||||
return (Byte128Vector)
|
||||
super.rearrangeTemplate(Byte128Shuffle.class,
|
||||
(Byte128Shuffle) s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector rearrange(VectorShuffle<Byte> shuffle,
|
||||
VectorMask<Byte> m) {
|
||||
return (Byte128Vector)
|
||||
super.rearrangeTemplate(Byte128Shuffle.class,
|
||||
(Byte128Shuffle) shuffle,
|
||||
(Byte128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector rearrange(VectorShuffle<Byte> s,
|
||||
Vector<Byte> v) {
|
||||
return (Byte128Vector)
|
||||
super.rearrangeTemplate(Byte128Shuffle.class,
|
||||
(Byte128Shuffle) s,
|
||||
(Byte128Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector selectFrom(Vector<Byte> v) {
|
||||
return (Byte128Vector)
|
||||
super.selectFromTemplate((Byte128Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector selectFrom(Vector<Byte> v,
|
||||
VectorMask<Byte> m) {
|
||||
return (Byte128Vector)
|
||||
super.selectFromTemplate((Byte128Vector) v,
|
||||
(Byte128Mask) m); // specialize
|
||||
}
|
||||
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public byte lane(int i) {
|
||||
switch(i) {
|
||||
case 0: return laneHelper(0);
|
||||
case 1: return laneHelper(1);
|
||||
case 2: return laneHelper(2);
|
||||
case 3: return laneHelper(3);
|
||||
case 4: return laneHelper(4);
|
||||
case 5: return laneHelper(5);
|
||||
case 6: return laneHelper(6);
|
||||
case 7: return laneHelper(7);
|
||||
case 8: return laneHelper(8);
|
||||
case 9: return laneHelper(9);
|
||||
case 10: return laneHelper(10);
|
||||
case 11: return laneHelper(11);
|
||||
case 12: return laneHelper(12);
|
||||
case 13: return laneHelper(13);
|
||||
case 14: return laneHelper(14);
|
||||
case 15: return laneHelper(15);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public byte laneHelper(int i) {
|
||||
return (byte) VectorSupport.extract(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i,
|
||||
(vec, ix) -> {
|
||||
byte[] vecarr = vec.vec();
|
||||
return (long)vecarr[ix];
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Byte128Vector withLane(int i, byte e) {
|
||||
switch (i) {
|
||||
case 0: return withLaneHelper(0, e);
|
||||
case 1: return withLaneHelper(1, e);
|
||||
case 2: return withLaneHelper(2, e);
|
||||
case 3: return withLaneHelper(3, e);
|
||||
case 4: return withLaneHelper(4, e);
|
||||
case 5: return withLaneHelper(5, e);
|
||||
case 6: return withLaneHelper(6, e);
|
||||
case 7: return withLaneHelper(7, e);
|
||||
case 8: return withLaneHelper(8, e);
|
||||
case 9: return withLaneHelper(9, e);
|
||||
case 10: return withLaneHelper(10, e);
|
||||
case 11: return withLaneHelper(11, e);
|
||||
case 12: return withLaneHelper(12, e);
|
||||
case 13: return withLaneHelper(13, e);
|
||||
case 14: return withLaneHelper(14, e);
|
||||
case 15: return withLaneHelper(15, e);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public Byte128Vector withLaneHelper(int i, byte e) {
|
||||
return VectorSupport.insert(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i, (long)e,
|
||||
(v, ix, bits) -> {
|
||||
byte[] res = v.vec().clone();
|
||||
res[ix] = (byte)bits;
|
||||
return v.vectorFactory(res);
|
||||
});
|
||||
}
|
||||
|
||||
// Mask
|
||||
|
||||
static final class Byte128Mask extends AbstractMask<Byte> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte128Mask(boolean[] bits) {
|
||||
this(bits, 0);
|
||||
}
|
||||
|
||||
Byte128Mask(boolean[] bits, int offset) {
|
||||
super(prepare(bits, offset));
|
||||
}
|
||||
|
||||
Byte128Mask(boolean val) {
|
||||
super(prepare(val));
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean[] bits, int offset) {
|
||||
boolean[] newBits = new boolean[VSPECIES.laneCount()];
|
||||
for (int i = 0; i < newBits.length; i++) {
|
||||
newBits[i] = bits[offset + i];
|
||||
}
|
||||
return newBits;
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean val) {
|
||||
boolean[] bits = new boolean[VSPECIES.laneCount()];
|
||||
Arrays.fill(bits, val);
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public ByteSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractMask, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
boolean[] getBits() {
|
||||
return (boolean[])getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
Byte128Mask uOp(MUnOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i]);
|
||||
}
|
||||
return new Byte128Mask(res);
|
||||
}
|
||||
|
||||
@Override
|
||||
Byte128Mask bOp(VectorMask<Byte> m, MBinOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
boolean[] mbits = ((Byte128Mask)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i], mbits[i]);
|
||||
}
|
||||
return new Byte128Mask(res);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final
|
||||
Byte128Vector toVector() {
|
||||
return (Byte128Vector) super.toVectorTemplate(); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
|
||||
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorMask length and species length differ");
|
||||
boolean[] maskArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte128Vector.Byte128Mask(maskArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short128Vector.Short128Mask(maskArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int128Vector.Int128Mask(maskArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long128Vector.Long128Mask(maskArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float128Vector.Float128Mask(maskArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double128Vector.Double128Mask(maskArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
// Unary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Mask not() {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Mask and(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte128Mask m = (Byte128Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte128Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Mask or(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte128Mask m = (Byte128Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte128Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/* package-private */
|
||||
Byte128Mask xor(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte128Mask m = (Byte128Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte128Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean anyTrue() {
|
||||
return VectorSupport.test(BT_ne, Byte128Mask.class, byte.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> anyTrueHelper(((Byte128Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean allTrue() {
|
||||
return VectorSupport.test(BT_overflow, Byte128Mask.class, byte.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> allTrueHelper(((Byte128Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
static Byte128Mask maskAll(boolean bit) {
|
||||
return VectorSupport.broadcastCoerced(Byte128Mask.class, byte.class, VLENGTH,
|
||||
(bit ? -1 : 0), null,
|
||||
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
|
||||
}
|
||||
private static final Byte128Mask TRUE_MASK = new Byte128Mask(true);
|
||||
private static final Byte128Mask FALSE_MASK = new Byte128Mask(false);
|
||||
|
||||
}
|
||||
|
||||
// Shuffle
|
||||
|
||||
static final class Byte128Shuffle extends AbstractShuffle<Byte> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte128Shuffle(byte[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Byte128Shuffle(int[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Byte128Shuffle(int[] reorder, int i) {
|
||||
super(VLENGTH, reorder, i);
|
||||
}
|
||||
|
||||
public Byte128Shuffle(IntUnaryOperator fn) {
|
||||
super(VLENGTH, fn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteSpecies vspecies() {
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
static {
|
||||
// There must be enough bits in the shuffle lanes to encode
|
||||
// VLENGTH valid indexes and VLENGTH exceptional ones.
|
||||
assert(VLENGTH < Byte.MAX_VALUE);
|
||||
assert(Byte.MIN_VALUE <= -VLENGTH);
|
||||
}
|
||||
static final Byte128Shuffle IOTA = new Byte128Shuffle(IDENTITY);
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte128Vector toVector() {
|
||||
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte128Shuffle.class, this, VLENGTH,
|
||||
(s) -> ((Byte128Vector)(((AbstractShuffle<Byte>)(s)).toVectorTemplate())));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
|
||||
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorShuffle length and species length differ");
|
||||
int[] shuffleArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte128Vector.Byte128Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short128Vector.Short128Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int128Vector.Int128Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long128Vector.Long128Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float128Vector.Float128Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double128Vector.Double128Shuffle(shuffleArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Byte128Shuffle rearrange(VectorShuffle<Byte> shuffle) {
|
||||
Byte128Shuffle s = (Byte128Shuffle) shuffle;
|
||||
byte[] reorder1 = reorder();
|
||||
byte[] reorder2 = s.reorder();
|
||||
byte[] r = new byte[reorder1.length];
|
||||
for (int i = 0; i < reorder1.length; i++) {
|
||||
int ssi = reorder2[i];
|
||||
r[i] = reorder1[ssi]; // throws on exceptional index
|
||||
}
|
||||
return new Byte128Shuffle(r);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================
|
||||
|
||||
// Specialized low-level memory operations.
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromArray0(byte[] a, int offset) {
|
||||
return super.fromArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoArray0(byte[] a, int offset) {
|
||||
super.intoArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
// End of specialized low-level memory operations.
|
||||
|
||||
// ================================================
|
||||
|
||||
}
|
||||
@ -0,0 +1,872 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import static jdk.internal.vm.vector.VectorSupport.*;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
// -- This file was mechanically generated: Do not edit! -- //
|
||||
|
||||
@SuppressWarnings("cast") // warning: redundant cast
|
||||
final class Byte256Vector extends ByteVector {
|
||||
static final ByteSpecies VSPECIES =
|
||||
(ByteSpecies) ByteVector.SPECIES_256;
|
||||
|
||||
static final VectorShape VSHAPE =
|
||||
VSPECIES.vectorShape();
|
||||
|
||||
static final Class<Byte256Vector> VCLASS = Byte256Vector.class;
|
||||
|
||||
static final int VSIZE = VSPECIES.vectorBitSize();
|
||||
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte256Vector(byte[] v) {
|
||||
super(v);
|
||||
}
|
||||
|
||||
// For compatibility as Byte256Vector::new,
|
||||
// stored into species.vectorFactory.
|
||||
Byte256Vector(Object v) {
|
||||
this((byte[]) v);
|
||||
}
|
||||
|
||||
static final Byte256Vector ZERO = new Byte256Vector(new byte[VLENGTH]);
|
||||
static final Byte256Vector IOTA = new Byte256Vector(VSPECIES.iotaArray());
|
||||
|
||||
static {
|
||||
// Warm up a few species caches.
|
||||
// If we do this too much we will
|
||||
// get NPEs from bootstrap circularity.
|
||||
VSPECIES.dummyVector();
|
||||
VSPECIES.withLanes(LaneType.BYTE);
|
||||
}
|
||||
|
||||
// Specialized extractors
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public ByteSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractVector, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Class<Byte> elementType() { return byte.class; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int elementSize() { return Byte.SIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final VectorShape shape() { return VSHAPE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int length() { return VLENGTH; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int bitSize() { return VSIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int byteSize() { return VSIZE / Byte.SIZE; }
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final @Override
|
||||
byte[] vec() {
|
||||
return (byte[])getPayload();
|
||||
}
|
||||
|
||||
// Virtualized constructors
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte256Vector broadcast(byte e) {
|
||||
return (Byte256Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte256Vector broadcast(long e) {
|
||||
return (Byte256Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte256Mask maskFromArray(boolean[] bits) {
|
||||
return new Byte256Mask(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte256Shuffle iotaShuffle() { return Byte256Shuffle.IOTA; }
|
||||
|
||||
@ForceInline
|
||||
Byte256Shuffle iotaShuffle(int start, int step, boolean wrap) {
|
||||
if (wrap) {
|
||||
return (Byte256Shuffle)VectorSupport.shuffleIota(ETYPE, Byte256Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
|
||||
} else {
|
||||
return (Byte256Shuffle)VectorSupport.shuffleIota(ETYPE, Byte256Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte256Shuffle shuffleFromBytes(byte[] reorder) { return new Byte256Shuffle(reorder); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte256Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte256Shuffle(indexes, i); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte256Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Byte256Shuffle(fn); }
|
||||
|
||||
// Make a vector of the same species but the given elements:
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte256Vector vectorFactory(byte[] vec) {
|
||||
return new Byte256Vector(vec);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte256Vector asByteVectorRaw() {
|
||||
return (Byte256Vector) super.asByteVectorRawTemplate(); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
AbstractVector<?> asVectorRaw(LaneType laneType) {
|
||||
return super.asVectorRawTemplate(laneType); // specialize
|
||||
}
|
||||
|
||||
// Unary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte256Vector uOp(FUnOp f) {
|
||||
return (Byte256Vector) super.uOpTemplate(f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte256Vector uOp(VectorMask<Byte> m, FUnOp f) {
|
||||
return (Byte256Vector)
|
||||
super.uOpTemplate((Byte256Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte256Vector bOp(Vector<Byte> v, FBinOp f) {
|
||||
return (Byte256Vector) super.bOpTemplate((Byte256Vector)v, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte256Vector bOp(Vector<Byte> v,
|
||||
VectorMask<Byte> m, FBinOp f) {
|
||||
return (Byte256Vector)
|
||||
super.bOpTemplate((Byte256Vector)v, (Byte256Mask)m,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
// Ternary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte256Vector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f) {
|
||||
return (Byte256Vector)
|
||||
super.tOpTemplate((Byte256Vector)v1, (Byte256Vector)v2,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte256Vector tOp(Vector<Byte> v1, Vector<Byte> v2,
|
||||
VectorMask<Byte> m, FTriOp f) {
|
||||
return (Byte256Vector)
|
||||
super.tOpTemplate((Byte256Vector)v1, (Byte256Vector)v2,
|
||||
(Byte256Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
byte rOp(byte v, FBinOp f) {
|
||||
return super.rOpTemplate(v, f); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> convertShape(VectorOperators.Conversion<Byte,F> conv,
|
||||
VectorSpecies<F> rsp, int part) {
|
||||
return super.convertShapeTemplate(conv, rsp, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
|
||||
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
|
||||
}
|
||||
|
||||
// Specialized algebraic operations:
|
||||
|
||||
// The following definition forces a specialized version of this
|
||||
// crucial method into the v-table of this class. A call to add()
|
||||
// will inline to a call to lanewise(ADD,), at which point the JIT
|
||||
// intrinsic will have the opcode of ADD, plus all the metadata
|
||||
// for this particular class, enabling it to generate precise
|
||||
// code.
|
||||
//
|
||||
// There is probably no benefit to the JIT to specialize the
|
||||
// masked or broadcast versions of the lanewise method.
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector lanewise(Unary op) {
|
||||
return (Byte256Vector) super.lanewiseTemplate(op); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector lanewise(Binary op, Vector<Byte> v) {
|
||||
return (Byte256Vector) super.lanewiseTemplate(op, v); // specialize
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline Byte256Vector
|
||||
lanewiseShift(VectorOperators.Binary op, int e) {
|
||||
return (Byte256Vector) super.lanewiseShiftTemplate(op, e); // specialize
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Byte256Vector
|
||||
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
|
||||
return (Byte256Vector) super.lanewiseTemplate(op, v1, v2); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Byte256Vector addIndex(int scale) {
|
||||
return (Byte256Vector) super.addIndexTemplate(scale); // specialize
|
||||
}
|
||||
|
||||
// Type specific horizontal reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final byte reduceLanes(VectorOperators.Associative op) {
|
||||
return super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final byte reduceLanes(VectorOperators.Associative op,
|
||||
VectorMask<Byte> m) {
|
||||
return super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op) {
|
||||
return (long) super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op,
|
||||
VectorMask<Byte> m) {
|
||||
return (long) super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorShuffle<Byte> toShuffle() {
|
||||
byte[] a = toArray();
|
||||
int[] sa = new int[a.length];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
sa[i] = (int) a[i];
|
||||
}
|
||||
return VectorShuffle.fromArray(VSPECIES, sa, 0);
|
||||
}
|
||||
|
||||
// Specialized unary testing
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte256Mask test(Test op) {
|
||||
return super.testTemplate(Byte256Mask.class, op); // specialize
|
||||
}
|
||||
|
||||
// Specialized comparisons
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte256Mask compare(Comparison op, Vector<Byte> v) {
|
||||
return super.compareTemplate(Byte256Mask.class, op, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte256Mask compare(Comparison op, byte s) {
|
||||
return super.compareTemplate(Byte256Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte256Mask compare(Comparison op, long s) {
|
||||
return super.compareTemplate(Byte256Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
|
||||
return (Byte256Vector)
|
||||
super.blendTemplate(Byte256Mask.class,
|
||||
(Byte256Vector) v,
|
||||
(Byte256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector slice(int origin, Vector<Byte> v) {
|
||||
return (Byte256Vector) super.sliceTemplate(origin, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector slice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Byte256Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(VLENGTH-origin))));
|
||||
Iota = iotaShuffle(origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector unslice(int origin, Vector<Byte> w, int part) {
|
||||
return (Byte256Vector) super.unsliceTemplate(origin, w, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m) {
|
||||
return (Byte256Vector)
|
||||
super.unsliceTemplate(Byte256Mask.class,
|
||||
origin, w, part,
|
||||
(Byte256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector unslice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Byte256Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((byte)(origin))));
|
||||
Iota = iotaShuffle(-origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector rearrange(VectorShuffle<Byte> s) {
|
||||
return (Byte256Vector)
|
||||
super.rearrangeTemplate(Byte256Shuffle.class,
|
||||
(Byte256Shuffle) s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector rearrange(VectorShuffle<Byte> shuffle,
|
||||
VectorMask<Byte> m) {
|
||||
return (Byte256Vector)
|
||||
super.rearrangeTemplate(Byte256Shuffle.class,
|
||||
(Byte256Shuffle) shuffle,
|
||||
(Byte256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector rearrange(VectorShuffle<Byte> s,
|
||||
Vector<Byte> v) {
|
||||
return (Byte256Vector)
|
||||
super.rearrangeTemplate(Byte256Shuffle.class,
|
||||
(Byte256Shuffle) s,
|
||||
(Byte256Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector selectFrom(Vector<Byte> v) {
|
||||
return (Byte256Vector)
|
||||
super.selectFromTemplate((Byte256Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector selectFrom(Vector<Byte> v,
|
||||
VectorMask<Byte> m) {
|
||||
return (Byte256Vector)
|
||||
super.selectFromTemplate((Byte256Vector) v,
|
||||
(Byte256Mask) m); // specialize
|
||||
}
|
||||
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public byte lane(int i) {
|
||||
switch(i) {
|
||||
case 0: return laneHelper(0);
|
||||
case 1: return laneHelper(1);
|
||||
case 2: return laneHelper(2);
|
||||
case 3: return laneHelper(3);
|
||||
case 4: return laneHelper(4);
|
||||
case 5: return laneHelper(5);
|
||||
case 6: return laneHelper(6);
|
||||
case 7: return laneHelper(7);
|
||||
case 8: return laneHelper(8);
|
||||
case 9: return laneHelper(9);
|
||||
case 10: return laneHelper(10);
|
||||
case 11: return laneHelper(11);
|
||||
case 12: return laneHelper(12);
|
||||
case 13: return laneHelper(13);
|
||||
case 14: return laneHelper(14);
|
||||
case 15: return laneHelper(15);
|
||||
case 16: return laneHelper(16);
|
||||
case 17: return laneHelper(17);
|
||||
case 18: return laneHelper(18);
|
||||
case 19: return laneHelper(19);
|
||||
case 20: return laneHelper(20);
|
||||
case 21: return laneHelper(21);
|
||||
case 22: return laneHelper(22);
|
||||
case 23: return laneHelper(23);
|
||||
case 24: return laneHelper(24);
|
||||
case 25: return laneHelper(25);
|
||||
case 26: return laneHelper(26);
|
||||
case 27: return laneHelper(27);
|
||||
case 28: return laneHelper(28);
|
||||
case 29: return laneHelper(29);
|
||||
case 30: return laneHelper(30);
|
||||
case 31: return laneHelper(31);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public byte laneHelper(int i) {
|
||||
return (byte) VectorSupport.extract(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i,
|
||||
(vec, ix) -> {
|
||||
byte[] vecarr = vec.vec();
|
||||
return (long)vecarr[ix];
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Byte256Vector withLane(int i, byte e) {
|
||||
switch (i) {
|
||||
case 0: return withLaneHelper(0, e);
|
||||
case 1: return withLaneHelper(1, e);
|
||||
case 2: return withLaneHelper(2, e);
|
||||
case 3: return withLaneHelper(3, e);
|
||||
case 4: return withLaneHelper(4, e);
|
||||
case 5: return withLaneHelper(5, e);
|
||||
case 6: return withLaneHelper(6, e);
|
||||
case 7: return withLaneHelper(7, e);
|
||||
case 8: return withLaneHelper(8, e);
|
||||
case 9: return withLaneHelper(9, e);
|
||||
case 10: return withLaneHelper(10, e);
|
||||
case 11: return withLaneHelper(11, e);
|
||||
case 12: return withLaneHelper(12, e);
|
||||
case 13: return withLaneHelper(13, e);
|
||||
case 14: return withLaneHelper(14, e);
|
||||
case 15: return withLaneHelper(15, e);
|
||||
case 16: return withLaneHelper(16, e);
|
||||
case 17: return withLaneHelper(17, e);
|
||||
case 18: return withLaneHelper(18, e);
|
||||
case 19: return withLaneHelper(19, e);
|
||||
case 20: return withLaneHelper(20, e);
|
||||
case 21: return withLaneHelper(21, e);
|
||||
case 22: return withLaneHelper(22, e);
|
||||
case 23: return withLaneHelper(23, e);
|
||||
case 24: return withLaneHelper(24, e);
|
||||
case 25: return withLaneHelper(25, e);
|
||||
case 26: return withLaneHelper(26, e);
|
||||
case 27: return withLaneHelper(27, e);
|
||||
case 28: return withLaneHelper(28, e);
|
||||
case 29: return withLaneHelper(29, e);
|
||||
case 30: return withLaneHelper(30, e);
|
||||
case 31: return withLaneHelper(31, e);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public Byte256Vector withLaneHelper(int i, byte e) {
|
||||
return VectorSupport.insert(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i, (long)e,
|
||||
(v, ix, bits) -> {
|
||||
byte[] res = v.vec().clone();
|
||||
res[ix] = (byte)bits;
|
||||
return v.vectorFactory(res);
|
||||
});
|
||||
}
|
||||
|
||||
// Mask
|
||||
|
||||
static final class Byte256Mask extends AbstractMask<Byte> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte256Mask(boolean[] bits) {
|
||||
this(bits, 0);
|
||||
}
|
||||
|
||||
Byte256Mask(boolean[] bits, int offset) {
|
||||
super(prepare(bits, offset));
|
||||
}
|
||||
|
||||
Byte256Mask(boolean val) {
|
||||
super(prepare(val));
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean[] bits, int offset) {
|
||||
boolean[] newBits = new boolean[VSPECIES.laneCount()];
|
||||
for (int i = 0; i < newBits.length; i++) {
|
||||
newBits[i] = bits[offset + i];
|
||||
}
|
||||
return newBits;
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean val) {
|
||||
boolean[] bits = new boolean[VSPECIES.laneCount()];
|
||||
Arrays.fill(bits, val);
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public ByteSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractMask, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
boolean[] getBits() {
|
||||
return (boolean[])getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
Byte256Mask uOp(MUnOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i]);
|
||||
}
|
||||
return new Byte256Mask(res);
|
||||
}
|
||||
|
||||
@Override
|
||||
Byte256Mask bOp(VectorMask<Byte> m, MBinOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
boolean[] mbits = ((Byte256Mask)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i], mbits[i]);
|
||||
}
|
||||
return new Byte256Mask(res);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final
|
||||
Byte256Vector toVector() {
|
||||
return (Byte256Vector) super.toVectorTemplate(); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
|
||||
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorMask length and species length differ");
|
||||
boolean[] maskArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte256Vector.Byte256Mask(maskArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short256Vector.Short256Mask(maskArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int256Vector.Int256Mask(maskArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long256Vector.Long256Mask(maskArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float256Vector.Float256Mask(maskArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double256Vector.Double256Mask(maskArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
// Unary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Mask not() {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Mask and(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte256Mask m = (Byte256Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte256Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Mask or(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte256Mask m = (Byte256Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte256Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/* package-private */
|
||||
Byte256Mask xor(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte256Mask m = (Byte256Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte256Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean anyTrue() {
|
||||
return VectorSupport.test(BT_ne, Byte256Mask.class, byte.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> anyTrueHelper(((Byte256Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean allTrue() {
|
||||
return VectorSupport.test(BT_overflow, Byte256Mask.class, byte.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> allTrueHelper(((Byte256Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
static Byte256Mask maskAll(boolean bit) {
|
||||
return VectorSupport.broadcastCoerced(Byte256Mask.class, byte.class, VLENGTH,
|
||||
(bit ? -1 : 0), null,
|
||||
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
|
||||
}
|
||||
private static final Byte256Mask TRUE_MASK = new Byte256Mask(true);
|
||||
private static final Byte256Mask FALSE_MASK = new Byte256Mask(false);
|
||||
|
||||
}
|
||||
|
||||
// Shuffle
|
||||
|
||||
static final class Byte256Shuffle extends AbstractShuffle<Byte> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte256Shuffle(byte[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Byte256Shuffle(int[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Byte256Shuffle(int[] reorder, int i) {
|
||||
super(VLENGTH, reorder, i);
|
||||
}
|
||||
|
||||
public Byte256Shuffle(IntUnaryOperator fn) {
|
||||
super(VLENGTH, fn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteSpecies vspecies() {
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
static {
|
||||
// There must be enough bits in the shuffle lanes to encode
|
||||
// VLENGTH valid indexes and VLENGTH exceptional ones.
|
||||
assert(VLENGTH < Byte.MAX_VALUE);
|
||||
assert(Byte.MIN_VALUE <= -VLENGTH);
|
||||
}
|
||||
static final Byte256Shuffle IOTA = new Byte256Shuffle(IDENTITY);
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte256Vector toVector() {
|
||||
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte256Shuffle.class, this, VLENGTH,
|
||||
(s) -> ((Byte256Vector)(((AbstractShuffle<Byte>)(s)).toVectorTemplate())));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
|
||||
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorShuffle length and species length differ");
|
||||
int[] shuffleArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte256Vector.Byte256Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short256Vector.Short256Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int256Vector.Int256Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long256Vector.Long256Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float256Vector.Float256Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double256Vector.Double256Shuffle(shuffleArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Byte256Shuffle rearrange(VectorShuffle<Byte> shuffle) {
|
||||
Byte256Shuffle s = (Byte256Shuffle) shuffle;
|
||||
byte[] reorder1 = reorder();
|
||||
byte[] reorder2 = s.reorder();
|
||||
byte[] r = new byte[reorder1.length];
|
||||
for (int i = 0; i < reorder1.length; i++) {
|
||||
int ssi = reorder2[i];
|
||||
r[i] = reorder1[ssi]; // throws on exceptional index
|
||||
}
|
||||
return new Byte256Shuffle(r);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================
|
||||
|
||||
// Specialized low-level memory operations.
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromArray0(byte[] a, int offset) {
|
||||
return super.fromArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoArray0(byte[] a, int offset) {
|
||||
super.intoArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
// End of specialized low-level memory operations.
|
||||
|
||||
// ================================================
|
||||
|
||||
}
|
||||
@ -0,0 +1,936 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import static jdk.internal.vm.vector.VectorSupport.*;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
// -- This file was mechanically generated: Do not edit! -- //
|
||||
|
||||
@SuppressWarnings("cast") // warning: redundant cast
|
||||
final class Byte512Vector extends ByteVector {
|
||||
static final ByteSpecies VSPECIES =
|
||||
(ByteSpecies) ByteVector.SPECIES_512;
|
||||
|
||||
static final VectorShape VSHAPE =
|
||||
VSPECIES.vectorShape();
|
||||
|
||||
static final Class<Byte512Vector> VCLASS = Byte512Vector.class;
|
||||
|
||||
static final int VSIZE = VSPECIES.vectorBitSize();
|
||||
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte512Vector(byte[] v) {
|
||||
super(v);
|
||||
}
|
||||
|
||||
// For compatibility as Byte512Vector::new,
|
||||
// stored into species.vectorFactory.
|
||||
Byte512Vector(Object v) {
|
||||
this((byte[]) v);
|
||||
}
|
||||
|
||||
static final Byte512Vector ZERO = new Byte512Vector(new byte[VLENGTH]);
|
||||
static final Byte512Vector IOTA = new Byte512Vector(VSPECIES.iotaArray());
|
||||
|
||||
static {
|
||||
// Warm up a few species caches.
|
||||
// If we do this too much we will
|
||||
// get NPEs from bootstrap circularity.
|
||||
VSPECIES.dummyVector();
|
||||
VSPECIES.withLanes(LaneType.BYTE);
|
||||
}
|
||||
|
||||
// Specialized extractors
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public ByteSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractVector, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Class<Byte> elementType() { return byte.class; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int elementSize() { return Byte.SIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final VectorShape shape() { return VSHAPE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int length() { return VLENGTH; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int bitSize() { return VSIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int byteSize() { return VSIZE / Byte.SIZE; }
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final @Override
|
||||
byte[] vec() {
|
||||
return (byte[])getPayload();
|
||||
}
|
||||
|
||||
// Virtualized constructors
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte512Vector broadcast(byte e) {
|
||||
return (Byte512Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte512Vector broadcast(long e) {
|
||||
return (Byte512Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte512Mask maskFromArray(boolean[] bits) {
|
||||
return new Byte512Mask(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte512Shuffle iotaShuffle() { return Byte512Shuffle.IOTA; }
|
||||
|
||||
@ForceInline
|
||||
Byte512Shuffle iotaShuffle(int start, int step, boolean wrap) {
|
||||
if (wrap) {
|
||||
return (Byte512Shuffle)VectorSupport.shuffleIota(ETYPE, Byte512Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
|
||||
} else {
|
||||
return (Byte512Shuffle)VectorSupport.shuffleIota(ETYPE, Byte512Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte512Shuffle shuffleFromBytes(byte[] reorder) { return new Byte512Shuffle(reorder); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte512Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte512Shuffle(indexes, i); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte512Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Byte512Shuffle(fn); }
|
||||
|
||||
// Make a vector of the same species but the given elements:
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte512Vector vectorFactory(byte[] vec) {
|
||||
return new Byte512Vector(vec);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte512Vector asByteVectorRaw() {
|
||||
return (Byte512Vector) super.asByteVectorRawTemplate(); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
AbstractVector<?> asVectorRaw(LaneType laneType) {
|
||||
return super.asVectorRawTemplate(laneType); // specialize
|
||||
}
|
||||
|
||||
// Unary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte512Vector uOp(FUnOp f) {
|
||||
return (Byte512Vector) super.uOpTemplate(f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte512Vector uOp(VectorMask<Byte> m, FUnOp f) {
|
||||
return (Byte512Vector)
|
||||
super.uOpTemplate((Byte512Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte512Vector bOp(Vector<Byte> v, FBinOp f) {
|
||||
return (Byte512Vector) super.bOpTemplate((Byte512Vector)v, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte512Vector bOp(Vector<Byte> v,
|
||||
VectorMask<Byte> m, FBinOp f) {
|
||||
return (Byte512Vector)
|
||||
super.bOpTemplate((Byte512Vector)v, (Byte512Mask)m,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
// Ternary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte512Vector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f) {
|
||||
return (Byte512Vector)
|
||||
super.tOpTemplate((Byte512Vector)v1, (Byte512Vector)v2,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte512Vector tOp(Vector<Byte> v1, Vector<Byte> v2,
|
||||
VectorMask<Byte> m, FTriOp f) {
|
||||
return (Byte512Vector)
|
||||
super.tOpTemplate((Byte512Vector)v1, (Byte512Vector)v2,
|
||||
(Byte512Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
byte rOp(byte v, FBinOp f) {
|
||||
return super.rOpTemplate(v, f); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> convertShape(VectorOperators.Conversion<Byte,F> conv,
|
||||
VectorSpecies<F> rsp, int part) {
|
||||
return super.convertShapeTemplate(conv, rsp, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
|
||||
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
|
||||
}
|
||||
|
||||
// Specialized algebraic operations:
|
||||
|
||||
// The following definition forces a specialized version of this
|
||||
// crucial method into the v-table of this class. A call to add()
|
||||
// will inline to a call to lanewise(ADD,), at which point the JIT
|
||||
// intrinsic will have the opcode of ADD, plus all the metadata
|
||||
// for this particular class, enabling it to generate precise
|
||||
// code.
|
||||
//
|
||||
// There is probably no benefit to the JIT to specialize the
|
||||
// masked or broadcast versions of the lanewise method.
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector lanewise(Unary op) {
|
||||
return (Byte512Vector) super.lanewiseTemplate(op); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector lanewise(Binary op, Vector<Byte> v) {
|
||||
return (Byte512Vector) super.lanewiseTemplate(op, v); // specialize
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline Byte512Vector
|
||||
lanewiseShift(VectorOperators.Binary op, int e) {
|
||||
return (Byte512Vector) super.lanewiseShiftTemplate(op, e); // specialize
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Byte512Vector
|
||||
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
|
||||
return (Byte512Vector) super.lanewiseTemplate(op, v1, v2); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Byte512Vector addIndex(int scale) {
|
||||
return (Byte512Vector) super.addIndexTemplate(scale); // specialize
|
||||
}
|
||||
|
||||
// Type specific horizontal reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final byte reduceLanes(VectorOperators.Associative op) {
|
||||
return super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final byte reduceLanes(VectorOperators.Associative op,
|
||||
VectorMask<Byte> m) {
|
||||
return super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op) {
|
||||
return (long) super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op,
|
||||
VectorMask<Byte> m) {
|
||||
return (long) super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorShuffle<Byte> toShuffle() {
|
||||
byte[] a = toArray();
|
||||
int[] sa = new int[a.length];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
sa[i] = (int) a[i];
|
||||
}
|
||||
return VectorShuffle.fromArray(VSPECIES, sa, 0);
|
||||
}
|
||||
|
||||
// Specialized unary testing
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte512Mask test(Test op) {
|
||||
return super.testTemplate(Byte512Mask.class, op); // specialize
|
||||
}
|
||||
|
||||
// Specialized comparisons
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte512Mask compare(Comparison op, Vector<Byte> v) {
|
||||
return super.compareTemplate(Byte512Mask.class, op, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte512Mask compare(Comparison op, byte s) {
|
||||
return super.compareTemplate(Byte512Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte512Mask compare(Comparison op, long s) {
|
||||
return super.compareTemplate(Byte512Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
|
||||
return (Byte512Vector)
|
||||
super.blendTemplate(Byte512Mask.class,
|
||||
(Byte512Vector) v,
|
||||
(Byte512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector slice(int origin, Vector<Byte> v) {
|
||||
return (Byte512Vector) super.sliceTemplate(origin, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector slice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Byte512Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(VLENGTH-origin))));
|
||||
Iota = iotaShuffle(origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector unslice(int origin, Vector<Byte> w, int part) {
|
||||
return (Byte512Vector) super.unsliceTemplate(origin, w, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m) {
|
||||
return (Byte512Vector)
|
||||
super.unsliceTemplate(Byte512Mask.class,
|
||||
origin, w, part,
|
||||
(Byte512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector unslice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Byte512Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((byte)(origin))));
|
||||
Iota = iotaShuffle(-origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector rearrange(VectorShuffle<Byte> s) {
|
||||
return (Byte512Vector)
|
||||
super.rearrangeTemplate(Byte512Shuffle.class,
|
||||
(Byte512Shuffle) s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector rearrange(VectorShuffle<Byte> shuffle,
|
||||
VectorMask<Byte> m) {
|
||||
return (Byte512Vector)
|
||||
super.rearrangeTemplate(Byte512Shuffle.class,
|
||||
(Byte512Shuffle) shuffle,
|
||||
(Byte512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector rearrange(VectorShuffle<Byte> s,
|
||||
Vector<Byte> v) {
|
||||
return (Byte512Vector)
|
||||
super.rearrangeTemplate(Byte512Shuffle.class,
|
||||
(Byte512Shuffle) s,
|
||||
(Byte512Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector selectFrom(Vector<Byte> v) {
|
||||
return (Byte512Vector)
|
||||
super.selectFromTemplate((Byte512Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector selectFrom(Vector<Byte> v,
|
||||
VectorMask<Byte> m) {
|
||||
return (Byte512Vector)
|
||||
super.selectFromTemplate((Byte512Vector) v,
|
||||
(Byte512Mask) m); // specialize
|
||||
}
|
||||
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public byte lane(int i) {
|
||||
switch(i) {
|
||||
case 0: return laneHelper(0);
|
||||
case 1: return laneHelper(1);
|
||||
case 2: return laneHelper(2);
|
||||
case 3: return laneHelper(3);
|
||||
case 4: return laneHelper(4);
|
||||
case 5: return laneHelper(5);
|
||||
case 6: return laneHelper(6);
|
||||
case 7: return laneHelper(7);
|
||||
case 8: return laneHelper(8);
|
||||
case 9: return laneHelper(9);
|
||||
case 10: return laneHelper(10);
|
||||
case 11: return laneHelper(11);
|
||||
case 12: return laneHelper(12);
|
||||
case 13: return laneHelper(13);
|
||||
case 14: return laneHelper(14);
|
||||
case 15: return laneHelper(15);
|
||||
case 16: return laneHelper(16);
|
||||
case 17: return laneHelper(17);
|
||||
case 18: return laneHelper(18);
|
||||
case 19: return laneHelper(19);
|
||||
case 20: return laneHelper(20);
|
||||
case 21: return laneHelper(21);
|
||||
case 22: return laneHelper(22);
|
||||
case 23: return laneHelper(23);
|
||||
case 24: return laneHelper(24);
|
||||
case 25: return laneHelper(25);
|
||||
case 26: return laneHelper(26);
|
||||
case 27: return laneHelper(27);
|
||||
case 28: return laneHelper(28);
|
||||
case 29: return laneHelper(29);
|
||||
case 30: return laneHelper(30);
|
||||
case 31: return laneHelper(31);
|
||||
case 32: return laneHelper(32);
|
||||
case 33: return laneHelper(33);
|
||||
case 34: return laneHelper(34);
|
||||
case 35: return laneHelper(35);
|
||||
case 36: return laneHelper(36);
|
||||
case 37: return laneHelper(37);
|
||||
case 38: return laneHelper(38);
|
||||
case 39: return laneHelper(39);
|
||||
case 40: return laneHelper(40);
|
||||
case 41: return laneHelper(41);
|
||||
case 42: return laneHelper(42);
|
||||
case 43: return laneHelper(43);
|
||||
case 44: return laneHelper(44);
|
||||
case 45: return laneHelper(45);
|
||||
case 46: return laneHelper(46);
|
||||
case 47: return laneHelper(47);
|
||||
case 48: return laneHelper(48);
|
||||
case 49: return laneHelper(49);
|
||||
case 50: return laneHelper(50);
|
||||
case 51: return laneHelper(51);
|
||||
case 52: return laneHelper(52);
|
||||
case 53: return laneHelper(53);
|
||||
case 54: return laneHelper(54);
|
||||
case 55: return laneHelper(55);
|
||||
case 56: return laneHelper(56);
|
||||
case 57: return laneHelper(57);
|
||||
case 58: return laneHelper(58);
|
||||
case 59: return laneHelper(59);
|
||||
case 60: return laneHelper(60);
|
||||
case 61: return laneHelper(61);
|
||||
case 62: return laneHelper(62);
|
||||
case 63: return laneHelper(63);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public byte laneHelper(int i) {
|
||||
return (byte) VectorSupport.extract(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i,
|
||||
(vec, ix) -> {
|
||||
byte[] vecarr = vec.vec();
|
||||
return (long)vecarr[ix];
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Byte512Vector withLane(int i, byte e) {
|
||||
switch (i) {
|
||||
case 0: return withLaneHelper(0, e);
|
||||
case 1: return withLaneHelper(1, e);
|
||||
case 2: return withLaneHelper(2, e);
|
||||
case 3: return withLaneHelper(3, e);
|
||||
case 4: return withLaneHelper(4, e);
|
||||
case 5: return withLaneHelper(5, e);
|
||||
case 6: return withLaneHelper(6, e);
|
||||
case 7: return withLaneHelper(7, e);
|
||||
case 8: return withLaneHelper(8, e);
|
||||
case 9: return withLaneHelper(9, e);
|
||||
case 10: return withLaneHelper(10, e);
|
||||
case 11: return withLaneHelper(11, e);
|
||||
case 12: return withLaneHelper(12, e);
|
||||
case 13: return withLaneHelper(13, e);
|
||||
case 14: return withLaneHelper(14, e);
|
||||
case 15: return withLaneHelper(15, e);
|
||||
case 16: return withLaneHelper(16, e);
|
||||
case 17: return withLaneHelper(17, e);
|
||||
case 18: return withLaneHelper(18, e);
|
||||
case 19: return withLaneHelper(19, e);
|
||||
case 20: return withLaneHelper(20, e);
|
||||
case 21: return withLaneHelper(21, e);
|
||||
case 22: return withLaneHelper(22, e);
|
||||
case 23: return withLaneHelper(23, e);
|
||||
case 24: return withLaneHelper(24, e);
|
||||
case 25: return withLaneHelper(25, e);
|
||||
case 26: return withLaneHelper(26, e);
|
||||
case 27: return withLaneHelper(27, e);
|
||||
case 28: return withLaneHelper(28, e);
|
||||
case 29: return withLaneHelper(29, e);
|
||||
case 30: return withLaneHelper(30, e);
|
||||
case 31: return withLaneHelper(31, e);
|
||||
case 32: return withLaneHelper(32, e);
|
||||
case 33: return withLaneHelper(33, e);
|
||||
case 34: return withLaneHelper(34, e);
|
||||
case 35: return withLaneHelper(35, e);
|
||||
case 36: return withLaneHelper(36, e);
|
||||
case 37: return withLaneHelper(37, e);
|
||||
case 38: return withLaneHelper(38, e);
|
||||
case 39: return withLaneHelper(39, e);
|
||||
case 40: return withLaneHelper(40, e);
|
||||
case 41: return withLaneHelper(41, e);
|
||||
case 42: return withLaneHelper(42, e);
|
||||
case 43: return withLaneHelper(43, e);
|
||||
case 44: return withLaneHelper(44, e);
|
||||
case 45: return withLaneHelper(45, e);
|
||||
case 46: return withLaneHelper(46, e);
|
||||
case 47: return withLaneHelper(47, e);
|
||||
case 48: return withLaneHelper(48, e);
|
||||
case 49: return withLaneHelper(49, e);
|
||||
case 50: return withLaneHelper(50, e);
|
||||
case 51: return withLaneHelper(51, e);
|
||||
case 52: return withLaneHelper(52, e);
|
||||
case 53: return withLaneHelper(53, e);
|
||||
case 54: return withLaneHelper(54, e);
|
||||
case 55: return withLaneHelper(55, e);
|
||||
case 56: return withLaneHelper(56, e);
|
||||
case 57: return withLaneHelper(57, e);
|
||||
case 58: return withLaneHelper(58, e);
|
||||
case 59: return withLaneHelper(59, e);
|
||||
case 60: return withLaneHelper(60, e);
|
||||
case 61: return withLaneHelper(61, e);
|
||||
case 62: return withLaneHelper(62, e);
|
||||
case 63: return withLaneHelper(63, e);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public Byte512Vector withLaneHelper(int i, byte e) {
|
||||
return VectorSupport.insert(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i, (long)e,
|
||||
(v, ix, bits) -> {
|
||||
byte[] res = v.vec().clone();
|
||||
res[ix] = (byte)bits;
|
||||
return v.vectorFactory(res);
|
||||
});
|
||||
}
|
||||
|
||||
// Mask
|
||||
|
||||
static final class Byte512Mask extends AbstractMask<Byte> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte512Mask(boolean[] bits) {
|
||||
this(bits, 0);
|
||||
}
|
||||
|
||||
Byte512Mask(boolean[] bits, int offset) {
|
||||
super(prepare(bits, offset));
|
||||
}
|
||||
|
||||
Byte512Mask(boolean val) {
|
||||
super(prepare(val));
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean[] bits, int offset) {
|
||||
boolean[] newBits = new boolean[VSPECIES.laneCount()];
|
||||
for (int i = 0; i < newBits.length; i++) {
|
||||
newBits[i] = bits[offset + i];
|
||||
}
|
||||
return newBits;
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean val) {
|
||||
boolean[] bits = new boolean[VSPECIES.laneCount()];
|
||||
Arrays.fill(bits, val);
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public ByteSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractMask, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
boolean[] getBits() {
|
||||
return (boolean[])getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
Byte512Mask uOp(MUnOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i]);
|
||||
}
|
||||
return new Byte512Mask(res);
|
||||
}
|
||||
|
||||
@Override
|
||||
Byte512Mask bOp(VectorMask<Byte> m, MBinOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
boolean[] mbits = ((Byte512Mask)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i], mbits[i]);
|
||||
}
|
||||
return new Byte512Mask(res);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final
|
||||
Byte512Vector toVector() {
|
||||
return (Byte512Vector) super.toVectorTemplate(); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
|
||||
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorMask length and species length differ");
|
||||
boolean[] maskArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte512Vector.Byte512Mask(maskArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short512Vector.Short512Mask(maskArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int512Vector.Int512Mask(maskArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long512Vector.Long512Mask(maskArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float512Vector.Float512Mask(maskArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double512Vector.Double512Mask(maskArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
// Unary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Mask not() {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Mask and(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte512Mask m = (Byte512Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte512Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Mask or(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte512Mask m = (Byte512Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte512Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/* package-private */
|
||||
Byte512Mask xor(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte512Mask m = (Byte512Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte512Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean anyTrue() {
|
||||
return VectorSupport.test(BT_ne, Byte512Mask.class, byte.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> anyTrueHelper(((Byte512Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean allTrue() {
|
||||
return VectorSupport.test(BT_overflow, Byte512Mask.class, byte.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> allTrueHelper(((Byte512Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
static Byte512Mask maskAll(boolean bit) {
|
||||
return VectorSupport.broadcastCoerced(Byte512Mask.class, byte.class, VLENGTH,
|
||||
(bit ? -1 : 0), null,
|
||||
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
|
||||
}
|
||||
private static final Byte512Mask TRUE_MASK = new Byte512Mask(true);
|
||||
private static final Byte512Mask FALSE_MASK = new Byte512Mask(false);
|
||||
|
||||
}
|
||||
|
||||
// Shuffle
|
||||
|
||||
static final class Byte512Shuffle extends AbstractShuffle<Byte> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte512Shuffle(byte[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Byte512Shuffle(int[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Byte512Shuffle(int[] reorder, int i) {
|
||||
super(VLENGTH, reorder, i);
|
||||
}
|
||||
|
||||
public Byte512Shuffle(IntUnaryOperator fn) {
|
||||
super(VLENGTH, fn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteSpecies vspecies() {
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
static {
|
||||
// There must be enough bits in the shuffle lanes to encode
|
||||
// VLENGTH valid indexes and VLENGTH exceptional ones.
|
||||
assert(VLENGTH < Byte.MAX_VALUE);
|
||||
assert(Byte.MIN_VALUE <= -VLENGTH);
|
||||
}
|
||||
static final Byte512Shuffle IOTA = new Byte512Shuffle(IDENTITY);
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte512Vector toVector() {
|
||||
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte512Shuffle.class, this, VLENGTH,
|
||||
(s) -> ((Byte512Vector)(((AbstractShuffle<Byte>)(s)).toVectorTemplate())));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
|
||||
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorShuffle length and species length differ");
|
||||
int[] shuffleArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte512Vector.Byte512Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short512Vector.Short512Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int512Vector.Int512Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long512Vector.Long512Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float512Vector.Float512Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double512Vector.Double512Shuffle(shuffleArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Byte512Shuffle rearrange(VectorShuffle<Byte> shuffle) {
|
||||
Byte512Shuffle s = (Byte512Shuffle) shuffle;
|
||||
byte[] reorder1 = reorder();
|
||||
byte[] reorder2 = s.reorder();
|
||||
byte[] r = new byte[reorder1.length];
|
||||
for (int i = 0; i < reorder1.length; i++) {
|
||||
int ssi = reorder2[i];
|
||||
r[i] = reorder1[ssi]; // throws on exceptional index
|
||||
}
|
||||
return new Byte512Shuffle(r);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================
|
||||
|
||||
// Specialized low-level memory operations.
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromArray0(byte[] a, int offset) {
|
||||
return super.fromArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoArray0(byte[] a, int offset) {
|
||||
super.intoArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
// End of specialized low-level memory operations.
|
||||
|
||||
// ================================================
|
||||
|
||||
}
|
||||
@ -0,0 +1,824 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import static jdk.internal.vm.vector.VectorSupport.*;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
// -- This file was mechanically generated: Do not edit! -- //
|
||||
|
||||
@SuppressWarnings("cast") // warning: redundant cast
|
||||
final class Byte64Vector extends ByteVector {
|
||||
static final ByteSpecies VSPECIES =
|
||||
(ByteSpecies) ByteVector.SPECIES_64;
|
||||
|
||||
static final VectorShape VSHAPE =
|
||||
VSPECIES.vectorShape();
|
||||
|
||||
static final Class<Byte64Vector> VCLASS = Byte64Vector.class;
|
||||
|
||||
static final int VSIZE = VSPECIES.vectorBitSize();
|
||||
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte64Vector(byte[] v) {
|
||||
super(v);
|
||||
}
|
||||
|
||||
// For compatibility as Byte64Vector::new,
|
||||
// stored into species.vectorFactory.
|
||||
Byte64Vector(Object v) {
|
||||
this((byte[]) v);
|
||||
}
|
||||
|
||||
static final Byte64Vector ZERO = new Byte64Vector(new byte[VLENGTH]);
|
||||
static final Byte64Vector IOTA = new Byte64Vector(VSPECIES.iotaArray());
|
||||
|
||||
static {
|
||||
// Warm up a few species caches.
|
||||
// If we do this too much we will
|
||||
// get NPEs from bootstrap circularity.
|
||||
VSPECIES.dummyVector();
|
||||
VSPECIES.withLanes(LaneType.BYTE);
|
||||
}
|
||||
|
||||
// Specialized extractors
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public ByteSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractVector, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Class<Byte> elementType() { return byte.class; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int elementSize() { return Byte.SIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final VectorShape shape() { return VSHAPE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int length() { return VLENGTH; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int bitSize() { return VSIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int byteSize() { return VSIZE / Byte.SIZE; }
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final @Override
|
||||
byte[] vec() {
|
||||
return (byte[])getPayload();
|
||||
}
|
||||
|
||||
// Virtualized constructors
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte64Vector broadcast(byte e) {
|
||||
return (Byte64Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte64Vector broadcast(long e) {
|
||||
return (Byte64Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte64Mask maskFromArray(boolean[] bits) {
|
||||
return new Byte64Mask(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte64Shuffle iotaShuffle() { return Byte64Shuffle.IOTA; }
|
||||
|
||||
@ForceInline
|
||||
Byte64Shuffle iotaShuffle(int start, int step, boolean wrap) {
|
||||
if (wrap) {
|
||||
return (Byte64Shuffle)VectorSupport.shuffleIota(ETYPE, Byte64Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
|
||||
} else {
|
||||
return (Byte64Shuffle)VectorSupport.shuffleIota(ETYPE, Byte64Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte64Shuffle shuffleFromBytes(byte[] reorder) { return new Byte64Shuffle(reorder); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte64Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte64Shuffle(indexes, i); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Byte64Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Byte64Shuffle(fn); }
|
||||
|
||||
// Make a vector of the same species but the given elements:
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte64Vector vectorFactory(byte[] vec) {
|
||||
return new Byte64Vector(vec);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte64Vector asByteVectorRaw() {
|
||||
return (Byte64Vector) super.asByteVectorRawTemplate(); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
AbstractVector<?> asVectorRaw(LaneType laneType) {
|
||||
return super.asVectorRawTemplate(laneType); // specialize
|
||||
}
|
||||
|
||||
// Unary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte64Vector uOp(FUnOp f) {
|
||||
return (Byte64Vector) super.uOpTemplate(f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte64Vector uOp(VectorMask<Byte> m, FUnOp f) {
|
||||
return (Byte64Vector)
|
||||
super.uOpTemplate((Byte64Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte64Vector bOp(Vector<Byte> v, FBinOp f) {
|
||||
return (Byte64Vector) super.bOpTemplate((Byte64Vector)v, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte64Vector bOp(Vector<Byte> v,
|
||||
VectorMask<Byte> m, FBinOp f) {
|
||||
return (Byte64Vector)
|
||||
super.bOpTemplate((Byte64Vector)v, (Byte64Mask)m,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
// Ternary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte64Vector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f) {
|
||||
return (Byte64Vector)
|
||||
super.tOpTemplate((Byte64Vector)v1, (Byte64Vector)v2,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte64Vector tOp(Vector<Byte> v1, Vector<Byte> v2,
|
||||
VectorMask<Byte> m, FTriOp f) {
|
||||
return (Byte64Vector)
|
||||
super.tOpTemplate((Byte64Vector)v1, (Byte64Vector)v2,
|
||||
(Byte64Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
byte rOp(byte v, FBinOp f) {
|
||||
return super.rOpTemplate(v, f); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> convertShape(VectorOperators.Conversion<Byte,F> conv,
|
||||
VectorSpecies<F> rsp, int part) {
|
||||
return super.convertShapeTemplate(conv, rsp, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
|
||||
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
|
||||
}
|
||||
|
||||
// Specialized algebraic operations:
|
||||
|
||||
// The following definition forces a specialized version of this
|
||||
// crucial method into the v-table of this class. A call to add()
|
||||
// will inline to a call to lanewise(ADD,), at which point the JIT
|
||||
// intrinsic will have the opcode of ADD, plus all the metadata
|
||||
// for this particular class, enabling it to generate precise
|
||||
// code.
|
||||
//
|
||||
// There is probably no benefit to the JIT to specialize the
|
||||
// masked or broadcast versions of the lanewise method.
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector lanewise(Unary op) {
|
||||
return (Byte64Vector) super.lanewiseTemplate(op); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector lanewise(Binary op, Vector<Byte> v) {
|
||||
return (Byte64Vector) super.lanewiseTemplate(op, v); // specialize
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline Byte64Vector
|
||||
lanewiseShift(VectorOperators.Binary op, int e) {
|
||||
return (Byte64Vector) super.lanewiseShiftTemplate(op, e); // specialize
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Byte64Vector
|
||||
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
|
||||
return (Byte64Vector) super.lanewiseTemplate(op, v1, v2); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Byte64Vector addIndex(int scale) {
|
||||
return (Byte64Vector) super.addIndexTemplate(scale); // specialize
|
||||
}
|
||||
|
||||
// Type specific horizontal reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final byte reduceLanes(VectorOperators.Associative op) {
|
||||
return super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final byte reduceLanes(VectorOperators.Associative op,
|
||||
VectorMask<Byte> m) {
|
||||
return super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op) {
|
||||
return (long) super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op,
|
||||
VectorMask<Byte> m) {
|
||||
return (long) super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorShuffle<Byte> toShuffle() {
|
||||
byte[] a = toArray();
|
||||
int[] sa = new int[a.length];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
sa[i] = (int) a[i];
|
||||
}
|
||||
return VectorShuffle.fromArray(VSPECIES, sa, 0);
|
||||
}
|
||||
|
||||
// Specialized unary testing
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte64Mask test(Test op) {
|
||||
return super.testTemplate(Byte64Mask.class, op); // specialize
|
||||
}
|
||||
|
||||
// Specialized comparisons
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte64Mask compare(Comparison op, Vector<Byte> v) {
|
||||
return super.compareTemplate(Byte64Mask.class, op, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte64Mask compare(Comparison op, byte s) {
|
||||
return super.compareTemplate(Byte64Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Byte64Mask compare(Comparison op, long s) {
|
||||
return super.compareTemplate(Byte64Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector blend(Vector<Byte> v, VectorMask<Byte> m) {
|
||||
return (Byte64Vector)
|
||||
super.blendTemplate(Byte64Mask.class,
|
||||
(Byte64Vector) v,
|
||||
(Byte64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector slice(int origin, Vector<Byte> v) {
|
||||
return (Byte64Vector) super.sliceTemplate(origin, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector slice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Byte64Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(VLENGTH-origin))));
|
||||
Iota = iotaShuffle(origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector unslice(int origin, Vector<Byte> w, int part) {
|
||||
return (Byte64Vector) super.unsliceTemplate(origin, w, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m) {
|
||||
return (Byte64Vector)
|
||||
super.unsliceTemplate(Byte64Mask.class,
|
||||
origin, w, part,
|
||||
(Byte64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector unslice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Byte64Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((byte)(origin))));
|
||||
Iota = iotaShuffle(-origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector rearrange(VectorShuffle<Byte> s) {
|
||||
return (Byte64Vector)
|
||||
super.rearrangeTemplate(Byte64Shuffle.class,
|
||||
(Byte64Shuffle) s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector rearrange(VectorShuffle<Byte> shuffle,
|
||||
VectorMask<Byte> m) {
|
||||
return (Byte64Vector)
|
||||
super.rearrangeTemplate(Byte64Shuffle.class,
|
||||
(Byte64Shuffle) shuffle,
|
||||
(Byte64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector rearrange(VectorShuffle<Byte> s,
|
||||
Vector<Byte> v) {
|
||||
return (Byte64Vector)
|
||||
super.rearrangeTemplate(Byte64Shuffle.class,
|
||||
(Byte64Shuffle) s,
|
||||
(Byte64Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector selectFrom(Vector<Byte> v) {
|
||||
return (Byte64Vector)
|
||||
super.selectFromTemplate((Byte64Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector selectFrom(Vector<Byte> v,
|
||||
VectorMask<Byte> m) {
|
||||
return (Byte64Vector)
|
||||
super.selectFromTemplate((Byte64Vector) v,
|
||||
(Byte64Mask) m); // specialize
|
||||
}
|
||||
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public byte lane(int i) {
|
||||
switch(i) {
|
||||
case 0: return laneHelper(0);
|
||||
case 1: return laneHelper(1);
|
||||
case 2: return laneHelper(2);
|
||||
case 3: return laneHelper(3);
|
||||
case 4: return laneHelper(4);
|
||||
case 5: return laneHelper(5);
|
||||
case 6: return laneHelper(6);
|
||||
case 7: return laneHelper(7);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public byte laneHelper(int i) {
|
||||
return (byte) VectorSupport.extract(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i,
|
||||
(vec, ix) -> {
|
||||
byte[] vecarr = vec.vec();
|
||||
return (long)vecarr[ix];
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Byte64Vector withLane(int i, byte e) {
|
||||
switch (i) {
|
||||
case 0: return withLaneHelper(0, e);
|
||||
case 1: return withLaneHelper(1, e);
|
||||
case 2: return withLaneHelper(2, e);
|
||||
case 3: return withLaneHelper(3, e);
|
||||
case 4: return withLaneHelper(4, e);
|
||||
case 5: return withLaneHelper(5, e);
|
||||
case 6: return withLaneHelper(6, e);
|
||||
case 7: return withLaneHelper(7, e);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public Byte64Vector withLaneHelper(int i, byte e) {
|
||||
return VectorSupport.insert(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i, (long)e,
|
||||
(v, ix, bits) -> {
|
||||
byte[] res = v.vec().clone();
|
||||
res[ix] = (byte)bits;
|
||||
return v.vectorFactory(res);
|
||||
});
|
||||
}
|
||||
|
||||
// Mask
|
||||
|
||||
static final class Byte64Mask extends AbstractMask<Byte> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte64Mask(boolean[] bits) {
|
||||
this(bits, 0);
|
||||
}
|
||||
|
||||
Byte64Mask(boolean[] bits, int offset) {
|
||||
super(prepare(bits, offset));
|
||||
}
|
||||
|
||||
Byte64Mask(boolean val) {
|
||||
super(prepare(val));
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean[] bits, int offset) {
|
||||
boolean[] newBits = new boolean[VSPECIES.laneCount()];
|
||||
for (int i = 0; i < newBits.length; i++) {
|
||||
newBits[i] = bits[offset + i];
|
||||
}
|
||||
return newBits;
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean val) {
|
||||
boolean[] bits = new boolean[VSPECIES.laneCount()];
|
||||
Arrays.fill(bits, val);
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public ByteSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractMask, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
boolean[] getBits() {
|
||||
return (boolean[])getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
Byte64Mask uOp(MUnOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i]);
|
||||
}
|
||||
return new Byte64Mask(res);
|
||||
}
|
||||
|
||||
@Override
|
||||
Byte64Mask bOp(VectorMask<Byte> m, MBinOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
boolean[] mbits = ((Byte64Mask)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i], mbits[i]);
|
||||
}
|
||||
return new Byte64Mask(res);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final
|
||||
Byte64Vector toVector() {
|
||||
return (Byte64Vector) super.toVectorTemplate(); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
|
||||
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorMask length and species length differ");
|
||||
boolean[] maskArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte64Vector.Byte64Mask(maskArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short64Vector.Short64Mask(maskArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int64Vector.Int64Mask(maskArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long64Vector.Long64Mask(maskArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float64Vector.Float64Mask(maskArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double64Vector.Double64Mask(maskArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
// Unary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Mask not() {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Mask and(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte64Mask m = (Byte64Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_AND, Byte64Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Mask or(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte64Mask m = (Byte64Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_OR, Byte64Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/* package-private */
|
||||
Byte64Mask xor(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Byte64Mask m = (Byte64Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_XOR, Byte64Mask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean anyTrue() {
|
||||
return VectorSupport.test(BT_ne, Byte64Mask.class, byte.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> anyTrueHelper(((Byte64Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean allTrue() {
|
||||
return VectorSupport.test(BT_overflow, Byte64Mask.class, byte.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> allTrueHelper(((Byte64Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
static Byte64Mask maskAll(boolean bit) {
|
||||
return VectorSupport.broadcastCoerced(Byte64Mask.class, byte.class, VLENGTH,
|
||||
(bit ? -1 : 0), null,
|
||||
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
|
||||
}
|
||||
private static final Byte64Mask TRUE_MASK = new Byte64Mask(true);
|
||||
private static final Byte64Mask FALSE_MASK = new Byte64Mask(false);
|
||||
|
||||
}
|
||||
|
||||
// Shuffle
|
||||
|
||||
static final class Byte64Shuffle extends AbstractShuffle<Byte> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
Byte64Shuffle(byte[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Byte64Shuffle(int[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Byte64Shuffle(int[] reorder, int i) {
|
||||
super(VLENGTH, reorder, i);
|
||||
}
|
||||
|
||||
public Byte64Shuffle(IntUnaryOperator fn) {
|
||||
super(VLENGTH, fn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteSpecies vspecies() {
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
static {
|
||||
// There must be enough bits in the shuffle lanes to encode
|
||||
// VLENGTH valid indexes and VLENGTH exceptional ones.
|
||||
assert(VLENGTH < Byte.MAX_VALUE);
|
||||
assert(Byte.MIN_VALUE <= -VLENGTH);
|
||||
}
|
||||
static final Byte64Shuffle IOTA = new Byte64Shuffle(IDENTITY);
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Byte64Vector toVector() {
|
||||
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte64Shuffle.class, this, VLENGTH,
|
||||
(s) -> ((Byte64Vector)(((AbstractShuffle<Byte>)(s)).toVectorTemplate())));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
|
||||
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorShuffle length and species length differ");
|
||||
int[] shuffleArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte64Vector.Byte64Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short64Vector.Short64Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int64Vector.Int64Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long64Vector.Long64Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float64Vector.Float64Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double64Vector.Double64Shuffle(shuffleArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Byte64Shuffle rearrange(VectorShuffle<Byte> shuffle) {
|
||||
Byte64Shuffle s = (Byte64Shuffle) shuffle;
|
||||
byte[] reorder1 = reorder();
|
||||
byte[] reorder2 = s.reorder();
|
||||
byte[] r = new byte[reorder1.length];
|
||||
for (int i = 0; i < reorder1.length; i++) {
|
||||
int ssi = reorder2[i];
|
||||
r[i] = reorder1[ssi]; // throws on exceptional index
|
||||
}
|
||||
return new Byte64Shuffle(r);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================
|
||||
|
||||
// Specialized low-level memory operations.
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromArray0(byte[] a, int offset) {
|
||||
return super.fromArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoArray0(byte[] a, int offset) {
|
||||
super.intoArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
// End of specialized low-level memory operations.
|
||||
|
||||
// ================================================
|
||||
|
||||
}
|
||||
@ -0,0 +1,810 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import static jdk.internal.vm.vector.VectorSupport.*;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
// -- This file was mechanically generated: Do not edit! -- //
|
||||
|
||||
@SuppressWarnings("cast") // warning: redundant cast
|
||||
final class ByteMaxVector extends ByteVector {
|
||||
static final ByteSpecies VSPECIES =
|
||||
(ByteSpecies) ByteVector.SPECIES_MAX;
|
||||
|
||||
static final VectorShape VSHAPE =
|
||||
VSPECIES.vectorShape();
|
||||
|
||||
static final Class<ByteMaxVector> VCLASS = ByteMaxVector.class;
|
||||
|
||||
static final int VSIZE = VSPECIES.vectorBitSize();
|
||||
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
ByteMaxVector(byte[] v) {
|
||||
super(v);
|
||||
}
|
||||
|
||||
// For compatibility as ByteMaxVector::new,
|
||||
// stored into species.vectorFactory.
|
||||
ByteMaxVector(Object v) {
|
||||
this((byte[]) v);
|
||||
}
|
||||
|
||||
static final ByteMaxVector ZERO = new ByteMaxVector(new byte[VLENGTH]);
|
||||
static final ByteMaxVector IOTA = new ByteMaxVector(VSPECIES.iotaArray());
|
||||
|
||||
static {
|
||||
// Warm up a few species caches.
|
||||
// If we do this too much we will
|
||||
// get NPEs from bootstrap circularity.
|
||||
VSPECIES.dummyVector();
|
||||
VSPECIES.withLanes(LaneType.BYTE);
|
||||
}
|
||||
|
||||
// Specialized extractors
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public ByteSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractVector, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Class<Byte> elementType() { return byte.class; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int elementSize() { return Byte.SIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final VectorShape shape() { return VSHAPE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int length() { return VLENGTH; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int bitSize() { return VSIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int byteSize() { return VSIZE / Byte.SIZE; }
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final @Override
|
||||
byte[] vec() {
|
||||
return (byte[])getPayload();
|
||||
}
|
||||
|
||||
// Virtualized constructors
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final ByteMaxVector broadcast(byte e) {
|
||||
return (ByteMaxVector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final ByteMaxVector broadcast(long e) {
|
||||
return (ByteMaxVector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
ByteMaxMask maskFromArray(boolean[] bits) {
|
||||
return new ByteMaxMask(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
ByteMaxShuffle iotaShuffle() { return ByteMaxShuffle.IOTA; }
|
||||
|
||||
@ForceInline
|
||||
ByteMaxShuffle iotaShuffle(int start, int step, boolean wrap) {
|
||||
if (wrap) {
|
||||
return (ByteMaxShuffle)VectorSupport.shuffleIota(ETYPE, ByteMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
|
||||
} else {
|
||||
return (ByteMaxShuffle)VectorSupport.shuffleIota(ETYPE, ByteMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
ByteMaxShuffle shuffleFromBytes(byte[] reorder) { return new ByteMaxShuffle(reorder); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
ByteMaxShuffle shuffleFromArray(int[] indexes, int i) { return new ByteMaxShuffle(indexes, i); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
ByteMaxShuffle shuffleFromOp(IntUnaryOperator fn) { return new ByteMaxShuffle(fn); }
|
||||
|
||||
// Make a vector of the same species but the given elements:
|
||||
@ForceInline
|
||||
final @Override
|
||||
ByteMaxVector vectorFactory(byte[] vec) {
|
||||
return new ByteMaxVector(vec);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
ByteMaxVector asByteVectorRaw() {
|
||||
return (ByteMaxVector) super.asByteVectorRawTemplate(); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
AbstractVector<?> asVectorRaw(LaneType laneType) {
|
||||
return super.asVectorRawTemplate(laneType); // specialize
|
||||
}
|
||||
|
||||
// Unary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
ByteMaxVector uOp(FUnOp f) {
|
||||
return (ByteMaxVector) super.uOpTemplate(f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
ByteMaxVector uOp(VectorMask<Byte> m, FUnOp f) {
|
||||
return (ByteMaxVector)
|
||||
super.uOpTemplate((ByteMaxMask)m, f); // specialize
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
ByteMaxVector bOp(Vector<Byte> v, FBinOp f) {
|
||||
return (ByteMaxVector) super.bOpTemplate((ByteMaxVector)v, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
ByteMaxVector bOp(Vector<Byte> v,
|
||||
VectorMask<Byte> m, FBinOp f) {
|
||||
return (ByteMaxVector)
|
||||
super.bOpTemplate((ByteMaxVector)v, (ByteMaxMask)m,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
// Ternary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
ByteMaxVector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f) {
|
||||
return (ByteMaxVector)
|
||||
super.tOpTemplate((ByteMaxVector)v1, (ByteMaxVector)v2,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
ByteMaxVector tOp(Vector<Byte> v1, Vector<Byte> v2,
|
||||
VectorMask<Byte> m, FTriOp f) {
|
||||
return (ByteMaxVector)
|
||||
super.tOpTemplate((ByteMaxVector)v1, (ByteMaxVector)v2,
|
||||
(ByteMaxMask)m, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
byte rOp(byte v, FBinOp f) {
|
||||
return super.rOpTemplate(v, f); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> convertShape(VectorOperators.Conversion<Byte,F> conv,
|
||||
VectorSpecies<F> rsp, int part) {
|
||||
return super.convertShapeTemplate(conv, rsp, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
|
||||
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
|
||||
}
|
||||
|
||||
// Specialized algebraic operations:
|
||||
|
||||
// The following definition forces a specialized version of this
|
||||
// crucial method into the v-table of this class. A call to add()
|
||||
// will inline to a call to lanewise(ADD,), at which point the JIT
|
||||
// intrinsic will have the opcode of ADD, plus all the metadata
|
||||
// for this particular class, enabling it to generate precise
|
||||
// code.
|
||||
//
|
||||
// There is probably no benefit to the JIT to specialize the
|
||||
// masked or broadcast versions of the lanewise method.
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector lanewise(Unary op) {
|
||||
return (ByteMaxVector) super.lanewiseTemplate(op); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector lanewise(Binary op, Vector<Byte> v) {
|
||||
return (ByteMaxVector) super.lanewiseTemplate(op, v); // specialize
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline ByteMaxVector
|
||||
lanewiseShift(VectorOperators.Binary op, int e) {
|
||||
return (ByteMaxVector) super.lanewiseShiftTemplate(op, e); // specialize
|
||||
}
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
ByteMaxVector
|
||||
lanewise(VectorOperators.Ternary op, Vector<Byte> v1, Vector<Byte> v2) {
|
||||
return (ByteMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
ByteMaxVector addIndex(int scale) {
|
||||
return (ByteMaxVector) super.addIndexTemplate(scale); // specialize
|
||||
}
|
||||
|
||||
// Type specific horizontal reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final byte reduceLanes(VectorOperators.Associative op) {
|
||||
return super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final byte reduceLanes(VectorOperators.Associative op,
|
||||
VectorMask<Byte> m) {
|
||||
return super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op) {
|
||||
return (long) super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op,
|
||||
VectorMask<Byte> m) {
|
||||
return (long) super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorShuffle<Byte> toShuffle() {
|
||||
byte[] a = toArray();
|
||||
int[] sa = new int[a.length];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
sa[i] = (int) a[i];
|
||||
}
|
||||
return VectorShuffle.fromArray(VSPECIES, sa, 0);
|
||||
}
|
||||
|
||||
// Specialized unary testing
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final ByteMaxMask test(Test op) {
|
||||
return super.testTemplate(ByteMaxMask.class, op); // specialize
|
||||
}
|
||||
|
||||
// Specialized comparisons
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final ByteMaxMask compare(Comparison op, Vector<Byte> v) {
|
||||
return super.compareTemplate(ByteMaxMask.class, op, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final ByteMaxMask compare(Comparison op, byte s) {
|
||||
return super.compareTemplate(ByteMaxMask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final ByteMaxMask compare(Comparison op, long s) {
|
||||
return super.compareTemplate(ByteMaxMask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector blend(Vector<Byte> v, VectorMask<Byte> m) {
|
||||
return (ByteMaxVector)
|
||||
super.blendTemplate(ByteMaxMask.class,
|
||||
(ByteMaxVector) v,
|
||||
(ByteMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector slice(int origin, Vector<Byte> v) {
|
||||
return (ByteMaxVector) super.sliceTemplate(origin, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector slice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
ByteMaxShuffle Iota = iotaShuffle();
|
||||
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(VLENGTH-origin))));
|
||||
Iota = iotaShuffle(origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector unslice(int origin, Vector<Byte> w, int part) {
|
||||
return (ByteMaxVector) super.unsliceTemplate(origin, w, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m) {
|
||||
return (ByteMaxVector)
|
||||
super.unsliceTemplate(ByteMaxMask.class,
|
||||
origin, w, part,
|
||||
(ByteMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector unslice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
ByteMaxShuffle Iota = iotaShuffle();
|
||||
VectorMask<Byte> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((byte)(origin))));
|
||||
Iota = iotaShuffle(-origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector rearrange(VectorShuffle<Byte> s) {
|
||||
return (ByteMaxVector)
|
||||
super.rearrangeTemplate(ByteMaxShuffle.class,
|
||||
(ByteMaxShuffle) s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector rearrange(VectorShuffle<Byte> shuffle,
|
||||
VectorMask<Byte> m) {
|
||||
return (ByteMaxVector)
|
||||
super.rearrangeTemplate(ByteMaxShuffle.class,
|
||||
(ByteMaxShuffle) shuffle,
|
||||
(ByteMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector rearrange(VectorShuffle<Byte> s,
|
||||
Vector<Byte> v) {
|
||||
return (ByteMaxVector)
|
||||
super.rearrangeTemplate(ByteMaxShuffle.class,
|
||||
(ByteMaxShuffle) s,
|
||||
(ByteMaxVector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector selectFrom(Vector<Byte> v) {
|
||||
return (ByteMaxVector)
|
||||
super.selectFromTemplate((ByteMaxVector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector selectFrom(Vector<Byte> v,
|
||||
VectorMask<Byte> m) {
|
||||
return (ByteMaxVector)
|
||||
super.selectFromTemplate((ByteMaxVector) v,
|
||||
(ByteMaxMask) m); // specialize
|
||||
}
|
||||
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public byte lane(int i) {
|
||||
if (i < 0 || i >= VLENGTH) {
|
||||
throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
return laneHelper(i);
|
||||
}
|
||||
|
||||
public byte laneHelper(int i) {
|
||||
return (byte) VectorSupport.extract(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i,
|
||||
(vec, ix) -> {
|
||||
byte[] vecarr = vec.vec();
|
||||
return (long)vecarr[ix];
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public ByteMaxVector withLane(int i, byte e) {
|
||||
if (i < 0 || i >= VLENGTH) {
|
||||
throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
return withLaneHelper(i, e);
|
||||
}
|
||||
|
||||
public ByteMaxVector withLaneHelper(int i, byte e) {
|
||||
return VectorSupport.insert(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i, (long)e,
|
||||
(v, ix, bits) -> {
|
||||
byte[] res = v.vec().clone();
|
||||
res[ix] = (byte)bits;
|
||||
return v.vectorFactory(res);
|
||||
});
|
||||
}
|
||||
|
||||
// Mask
|
||||
|
||||
static final class ByteMaxMask extends AbstractMask<Byte> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
ByteMaxMask(boolean[] bits) {
|
||||
this(bits, 0);
|
||||
}
|
||||
|
||||
ByteMaxMask(boolean[] bits, int offset) {
|
||||
super(prepare(bits, offset));
|
||||
}
|
||||
|
||||
ByteMaxMask(boolean val) {
|
||||
super(prepare(val));
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean[] bits, int offset) {
|
||||
boolean[] newBits = new boolean[VSPECIES.laneCount()];
|
||||
for (int i = 0; i < newBits.length; i++) {
|
||||
newBits[i] = bits[offset + i];
|
||||
}
|
||||
return newBits;
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean val) {
|
||||
boolean[] bits = new boolean[VSPECIES.laneCount()];
|
||||
Arrays.fill(bits, val);
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public ByteSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractMask, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
boolean[] getBits() {
|
||||
return (boolean[])getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
ByteMaxMask uOp(MUnOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i]);
|
||||
}
|
||||
return new ByteMaxMask(res);
|
||||
}
|
||||
|
||||
@Override
|
||||
ByteMaxMask bOp(VectorMask<Byte> m, MBinOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
boolean[] mbits = ((ByteMaxMask)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i], mbits[i]);
|
||||
}
|
||||
return new ByteMaxMask(res);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final
|
||||
ByteMaxVector toVector() {
|
||||
return (ByteMaxVector) super.toVectorTemplate(); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
|
||||
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorMask length and species length differ");
|
||||
boolean[] maskArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new ByteMaxVector.ByteMaxMask(maskArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new ShortMaxVector.ShortMaxMask(maskArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new IntMaxVector.IntMaxMask(maskArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new LongMaxVector.LongMaxMask(maskArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new FloatMaxVector.FloatMaxMask(maskArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new DoubleMaxVector.DoubleMaxMask(maskArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
// Unary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxMask not() {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxMask and(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
ByteMaxMask m = (ByteMaxMask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_AND, ByteMaxMask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxMask or(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
ByteMaxMask m = (ByteMaxMask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_OR, ByteMaxMask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/* package-private */
|
||||
ByteMaxMask xor(VectorMask<Byte> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
ByteMaxMask m = (ByteMaxMask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_XOR, ByteMaxMask.class, byte.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean anyTrue() {
|
||||
return VectorSupport.test(BT_ne, ByteMaxMask.class, byte.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> anyTrueHelper(((ByteMaxMask)m).getBits()));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean allTrue() {
|
||||
return VectorSupport.test(BT_overflow, ByteMaxMask.class, byte.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> allTrueHelper(((ByteMaxMask)m).getBits()));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
static ByteMaxMask maskAll(boolean bit) {
|
||||
return VectorSupport.broadcastCoerced(ByteMaxMask.class, byte.class, VLENGTH,
|
||||
(bit ? -1 : 0), null,
|
||||
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
|
||||
}
|
||||
private static final ByteMaxMask TRUE_MASK = new ByteMaxMask(true);
|
||||
private static final ByteMaxMask FALSE_MASK = new ByteMaxMask(false);
|
||||
|
||||
}
|
||||
|
||||
// Shuffle
|
||||
|
||||
static final class ByteMaxShuffle extends AbstractShuffle<Byte> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Byte> ETYPE = byte.class; // used by the JVM
|
||||
|
||||
ByteMaxShuffle(byte[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public ByteMaxShuffle(int[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public ByteMaxShuffle(int[] reorder, int i) {
|
||||
super(VLENGTH, reorder, i);
|
||||
}
|
||||
|
||||
public ByteMaxShuffle(IntUnaryOperator fn) {
|
||||
super(VLENGTH, fn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteSpecies vspecies() {
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
static {
|
||||
// There must be enough bits in the shuffle lanes to encode
|
||||
// VLENGTH valid indexes and VLENGTH exceptional ones.
|
||||
assert(VLENGTH < Byte.MAX_VALUE);
|
||||
assert(Byte.MIN_VALUE <= -VLENGTH);
|
||||
}
|
||||
static final ByteMaxShuffle IOTA = new ByteMaxShuffle(IDENTITY);
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public ByteMaxVector toVector() {
|
||||
return VectorSupport.shuffleToVector(VCLASS, ETYPE, ByteMaxShuffle.class, this, VLENGTH,
|
||||
(s) -> ((ByteMaxVector)(((AbstractShuffle<Byte>)(s)).toVectorTemplate())));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
|
||||
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorShuffle length and species length differ");
|
||||
int[] shuffleArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new ByteMaxVector.ByteMaxShuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new ShortMaxVector.ShortMaxShuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new IntMaxVector.IntMaxShuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new LongMaxVector.LongMaxShuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new FloatMaxVector.FloatMaxShuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new DoubleMaxVector.DoubleMaxShuffle(shuffleArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public ByteMaxShuffle rearrange(VectorShuffle<Byte> shuffle) {
|
||||
ByteMaxShuffle s = (ByteMaxShuffle) shuffle;
|
||||
byte[] reorder1 = reorder();
|
||||
byte[] reorder2 = s.reorder();
|
||||
byte[] r = new byte[reorder1.length];
|
||||
for (int i = 0; i < reorder1.length; i++) {
|
||||
int ssi = reorder2[i];
|
||||
r[i] = reorder1[ssi]; // throws on exceptional index
|
||||
}
|
||||
return new ByteMaxShuffle(r);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================
|
||||
|
||||
// Specialized low-level memory operations.
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromArray0(byte[] a, int offset) {
|
||||
return super.fromArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
ByteVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoArray0(byte[] a, int offset) {
|
||||
super.intoArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
// End of specialized low-level memory operations.
|
||||
|
||||
// ================================================
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,808 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import static jdk.internal.vm.vector.VectorSupport.*;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
// -- This file was mechanically generated: Do not edit! -- //
|
||||
|
||||
@SuppressWarnings("cast") // warning: redundant cast
|
||||
final class Double128Vector extends DoubleVector {
|
||||
static final DoubleSpecies VSPECIES =
|
||||
(DoubleSpecies) DoubleVector.SPECIES_128;
|
||||
|
||||
static final VectorShape VSHAPE =
|
||||
VSPECIES.vectorShape();
|
||||
|
||||
static final Class<Double128Vector> VCLASS = Double128Vector.class;
|
||||
|
||||
static final int VSIZE = VSPECIES.vectorBitSize();
|
||||
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double128Vector(double[] v) {
|
||||
super(v);
|
||||
}
|
||||
|
||||
// For compatibility as Double128Vector::new,
|
||||
// stored into species.vectorFactory.
|
||||
Double128Vector(Object v) {
|
||||
this((double[]) v);
|
||||
}
|
||||
|
||||
static final Double128Vector ZERO = new Double128Vector(new double[VLENGTH]);
|
||||
static final Double128Vector IOTA = new Double128Vector(VSPECIES.iotaArray());
|
||||
|
||||
static {
|
||||
// Warm up a few species caches.
|
||||
// If we do this too much we will
|
||||
// get NPEs from bootstrap circularity.
|
||||
VSPECIES.dummyVector();
|
||||
VSPECIES.withLanes(LaneType.BYTE);
|
||||
}
|
||||
|
||||
// Specialized extractors
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public DoubleSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractVector, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Class<Double> elementType() { return double.class; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int elementSize() { return Double.SIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final VectorShape shape() { return VSHAPE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int length() { return VLENGTH; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int bitSize() { return VSIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int byteSize() { return VSIZE / Byte.SIZE; }
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final @Override
|
||||
double[] vec() {
|
||||
return (double[])getPayload();
|
||||
}
|
||||
|
||||
// Virtualized constructors
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double128Vector broadcast(double e) {
|
||||
return (Double128Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double128Vector broadcast(long e) {
|
||||
return (Double128Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double128Mask maskFromArray(boolean[] bits) {
|
||||
return new Double128Mask(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double128Shuffle iotaShuffle() { return Double128Shuffle.IOTA; }
|
||||
|
||||
@ForceInline
|
||||
Double128Shuffle iotaShuffle(int start, int step, boolean wrap) {
|
||||
if (wrap) {
|
||||
return (Double128Shuffle)VectorSupport.shuffleIota(ETYPE, Double128Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
|
||||
} else {
|
||||
return (Double128Shuffle)VectorSupport.shuffleIota(ETYPE, Double128Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double128Shuffle shuffleFromBytes(byte[] reorder) { return new Double128Shuffle(reorder); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double128Shuffle shuffleFromArray(int[] indexes, int i) { return new Double128Shuffle(indexes, i); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double128Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Double128Shuffle(fn); }
|
||||
|
||||
// Make a vector of the same species but the given elements:
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double128Vector vectorFactory(double[] vec) {
|
||||
return new Double128Vector(vec);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte128Vector asByteVectorRaw() {
|
||||
return (Byte128Vector) super.asByteVectorRawTemplate(); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
AbstractVector<?> asVectorRaw(LaneType laneType) {
|
||||
return super.asVectorRawTemplate(laneType); // specialize
|
||||
}
|
||||
|
||||
// Unary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double128Vector uOp(FUnOp f) {
|
||||
return (Double128Vector) super.uOpTemplate(f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double128Vector uOp(VectorMask<Double> m, FUnOp f) {
|
||||
return (Double128Vector)
|
||||
super.uOpTemplate((Double128Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double128Vector bOp(Vector<Double> v, FBinOp f) {
|
||||
return (Double128Vector) super.bOpTemplate((Double128Vector)v, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double128Vector bOp(Vector<Double> v,
|
||||
VectorMask<Double> m, FBinOp f) {
|
||||
return (Double128Vector)
|
||||
super.bOpTemplate((Double128Vector)v, (Double128Mask)m,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
// Ternary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double128Vector tOp(Vector<Double> v1, Vector<Double> v2, FTriOp f) {
|
||||
return (Double128Vector)
|
||||
super.tOpTemplate((Double128Vector)v1, (Double128Vector)v2,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double128Vector tOp(Vector<Double> v1, Vector<Double> v2,
|
||||
VectorMask<Double> m, FTriOp f) {
|
||||
return (Double128Vector)
|
||||
super.tOpTemplate((Double128Vector)v1, (Double128Vector)v2,
|
||||
(Double128Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
double rOp(double v, FBinOp f) {
|
||||
return super.rOpTemplate(v, f); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> convertShape(VectorOperators.Conversion<Double,F> conv,
|
||||
VectorSpecies<F> rsp, int part) {
|
||||
return super.convertShapeTemplate(conv, rsp, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
|
||||
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
|
||||
}
|
||||
|
||||
// Specialized algebraic operations:
|
||||
|
||||
// The following definition forces a specialized version of this
|
||||
// crucial method into the v-table of this class. A call to add()
|
||||
// will inline to a call to lanewise(ADD,), at which point the JIT
|
||||
// intrinsic will have the opcode of ADD, plus all the metadata
|
||||
// for this particular class, enabling it to generate precise
|
||||
// code.
|
||||
//
|
||||
// There is probably no benefit to the JIT to specialize the
|
||||
// masked or broadcast versions of the lanewise method.
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector lanewise(Unary op) {
|
||||
return (Double128Vector) super.lanewiseTemplate(op); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector lanewise(Binary op, Vector<Double> v) {
|
||||
return (Double128Vector) super.lanewiseTemplate(op, v); // specialize
|
||||
}
|
||||
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Double128Vector
|
||||
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
|
||||
return (Double128Vector) super.lanewiseTemplate(op, v1, v2); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Double128Vector addIndex(int scale) {
|
||||
return (Double128Vector) super.addIndexTemplate(scale); // specialize
|
||||
}
|
||||
|
||||
// Type specific horizontal reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final double reduceLanes(VectorOperators.Associative op) {
|
||||
return super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final double reduceLanes(VectorOperators.Associative op,
|
||||
VectorMask<Double> m) {
|
||||
return super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op) {
|
||||
return (long) super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op,
|
||||
VectorMask<Double> m) {
|
||||
return (long) super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorShuffle<Double> toShuffle() {
|
||||
double[] a = toArray();
|
||||
int[] sa = new int[a.length];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
sa[i] = (int) a[i];
|
||||
}
|
||||
return VectorShuffle.fromArray(VSPECIES, sa, 0);
|
||||
}
|
||||
|
||||
// Specialized unary testing
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double128Mask test(Test op) {
|
||||
return super.testTemplate(Double128Mask.class, op); // specialize
|
||||
}
|
||||
|
||||
// Specialized comparisons
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double128Mask compare(Comparison op, Vector<Double> v) {
|
||||
return super.compareTemplate(Double128Mask.class, op, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double128Mask compare(Comparison op, double s) {
|
||||
return super.compareTemplate(Double128Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double128Mask compare(Comparison op, long s) {
|
||||
return super.compareTemplate(Double128Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector blend(Vector<Double> v, VectorMask<Double> m) {
|
||||
return (Double128Vector)
|
||||
super.blendTemplate(Double128Mask.class,
|
||||
(Double128Vector) v,
|
||||
(Double128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector slice(int origin, Vector<Double> v) {
|
||||
return (Double128Vector) super.sliceTemplate(origin, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector slice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Double128Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((double)(VLENGTH-origin))));
|
||||
Iota = iotaShuffle(origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector unslice(int origin, Vector<Double> w, int part) {
|
||||
return (Double128Vector) super.unsliceTemplate(origin, w, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector unslice(int origin, Vector<Double> w, int part, VectorMask<Double> m) {
|
||||
return (Double128Vector)
|
||||
super.unsliceTemplate(Double128Mask.class,
|
||||
origin, w, part,
|
||||
(Double128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector unslice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Double128Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((double)(origin))));
|
||||
Iota = iotaShuffle(-origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector rearrange(VectorShuffle<Double> s) {
|
||||
return (Double128Vector)
|
||||
super.rearrangeTemplate(Double128Shuffle.class,
|
||||
(Double128Shuffle) s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector rearrange(VectorShuffle<Double> shuffle,
|
||||
VectorMask<Double> m) {
|
||||
return (Double128Vector)
|
||||
super.rearrangeTemplate(Double128Shuffle.class,
|
||||
(Double128Shuffle) shuffle,
|
||||
(Double128Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector rearrange(VectorShuffle<Double> s,
|
||||
Vector<Double> v) {
|
||||
return (Double128Vector)
|
||||
super.rearrangeTemplate(Double128Shuffle.class,
|
||||
(Double128Shuffle) s,
|
||||
(Double128Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector selectFrom(Vector<Double> v) {
|
||||
return (Double128Vector)
|
||||
super.selectFromTemplate((Double128Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector selectFrom(Vector<Double> v,
|
||||
VectorMask<Double> m) {
|
||||
return (Double128Vector)
|
||||
super.selectFromTemplate((Double128Vector) v,
|
||||
(Double128Mask) m); // specialize
|
||||
}
|
||||
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public double lane(int i) {
|
||||
long bits;
|
||||
switch(i) {
|
||||
case 0: bits = laneHelper(0); break;
|
||||
case 1: bits = laneHelper(1); break;
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
return Double.longBitsToDouble(bits);
|
||||
}
|
||||
|
||||
public long laneHelper(int i) {
|
||||
return (long) VectorSupport.extract(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i,
|
||||
(vec, ix) -> {
|
||||
double[] vecarr = vec.vec();
|
||||
return (long)Double.doubleToLongBits(vecarr[ix]);
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Double128Vector withLane(int i, double e) {
|
||||
switch(i) {
|
||||
case 0: return withLaneHelper(0, e);
|
||||
case 1: return withLaneHelper(1, e);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public Double128Vector withLaneHelper(int i, double e) {
|
||||
return VectorSupport.insert(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i, (long)Double.doubleToLongBits(e),
|
||||
(v, ix, bits) -> {
|
||||
double[] res = v.vec().clone();
|
||||
res[ix] = Double.longBitsToDouble((long)bits);
|
||||
return v.vectorFactory(res);
|
||||
});
|
||||
}
|
||||
|
||||
// Mask
|
||||
|
||||
static final class Double128Mask extends AbstractMask<Double> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double128Mask(boolean[] bits) {
|
||||
this(bits, 0);
|
||||
}
|
||||
|
||||
Double128Mask(boolean[] bits, int offset) {
|
||||
super(prepare(bits, offset));
|
||||
}
|
||||
|
||||
Double128Mask(boolean val) {
|
||||
super(prepare(val));
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean[] bits, int offset) {
|
||||
boolean[] newBits = new boolean[VSPECIES.laneCount()];
|
||||
for (int i = 0; i < newBits.length; i++) {
|
||||
newBits[i] = bits[offset + i];
|
||||
}
|
||||
return newBits;
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean val) {
|
||||
boolean[] bits = new boolean[VSPECIES.laneCount()];
|
||||
Arrays.fill(bits, val);
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public DoubleSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractMask, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
boolean[] getBits() {
|
||||
return (boolean[])getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
Double128Mask uOp(MUnOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i]);
|
||||
}
|
||||
return new Double128Mask(res);
|
||||
}
|
||||
|
||||
@Override
|
||||
Double128Mask bOp(VectorMask<Double> m, MBinOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
boolean[] mbits = ((Double128Mask)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i], mbits[i]);
|
||||
}
|
||||
return new Double128Mask(res);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final
|
||||
Double128Vector toVector() {
|
||||
return (Double128Vector) super.toVectorTemplate(); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
|
||||
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorMask length and species length differ");
|
||||
boolean[] maskArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte128Vector.Byte128Mask(maskArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short128Vector.Short128Mask(maskArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int128Vector.Int128Mask(maskArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long128Vector.Long128Mask(maskArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float128Vector.Float128Mask(maskArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double128Vector.Double128Mask(maskArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
// Unary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Mask not() {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Mask and(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double128Mask m = (Double128Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_AND, Double128Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Mask or(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double128Mask m = (Double128Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_OR, Double128Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/* package-private */
|
||||
Double128Mask xor(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double128Mask m = (Double128Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double128Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean anyTrue() {
|
||||
return VectorSupport.test(BT_ne, Double128Mask.class, long.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> anyTrueHelper(((Double128Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean allTrue() {
|
||||
return VectorSupport.test(BT_overflow, Double128Mask.class, long.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> allTrueHelper(((Double128Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
static Double128Mask maskAll(boolean bit) {
|
||||
return VectorSupport.broadcastCoerced(Double128Mask.class, long.class, VLENGTH,
|
||||
(bit ? -1 : 0), null,
|
||||
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
|
||||
}
|
||||
private static final Double128Mask TRUE_MASK = new Double128Mask(true);
|
||||
private static final Double128Mask FALSE_MASK = new Double128Mask(false);
|
||||
|
||||
}
|
||||
|
||||
// Shuffle
|
||||
|
||||
static final class Double128Shuffle extends AbstractShuffle<Double> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double128Shuffle(byte[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Double128Shuffle(int[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Double128Shuffle(int[] reorder, int i) {
|
||||
super(VLENGTH, reorder, i);
|
||||
}
|
||||
|
||||
public Double128Shuffle(IntUnaryOperator fn) {
|
||||
super(VLENGTH, fn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleSpecies vspecies() {
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
static {
|
||||
// There must be enough bits in the shuffle lanes to encode
|
||||
// VLENGTH valid indexes and VLENGTH exceptional ones.
|
||||
assert(VLENGTH < Byte.MAX_VALUE);
|
||||
assert(Byte.MIN_VALUE <= -VLENGTH);
|
||||
}
|
||||
static final Double128Shuffle IOTA = new Double128Shuffle(IDENTITY);
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double128Vector toVector() {
|
||||
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double128Shuffle.class, this, VLENGTH,
|
||||
(s) -> ((Double128Vector)(((AbstractShuffle<Double>)(s)).toVectorTemplate())));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
|
||||
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorShuffle length and species length differ");
|
||||
int[] shuffleArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte128Vector.Byte128Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short128Vector.Short128Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int128Vector.Int128Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long128Vector.Long128Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float128Vector.Float128Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double128Vector.Double128Shuffle(shuffleArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Double128Shuffle rearrange(VectorShuffle<Double> shuffle) {
|
||||
Double128Shuffle s = (Double128Shuffle) shuffle;
|
||||
byte[] reorder1 = reorder();
|
||||
byte[] reorder2 = s.reorder();
|
||||
byte[] r = new byte[reorder1.length];
|
||||
for (int i = 0; i < reorder1.length; i++) {
|
||||
int ssi = reorder2[i];
|
||||
r[i] = reorder1[ssi]; // throws on exceptional index
|
||||
}
|
||||
return new Double128Shuffle(r);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================
|
||||
|
||||
// Specialized low-level memory operations.
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromArray0(double[] a, int offset) {
|
||||
return super.fromArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoArray0(double[] a, int offset) {
|
||||
super.intoArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
// End of specialized low-level memory operations.
|
||||
|
||||
// ================================================
|
||||
|
||||
}
|
||||
@ -0,0 +1,812 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import static jdk.internal.vm.vector.VectorSupport.*;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
// -- This file was mechanically generated: Do not edit! -- //
|
||||
|
||||
@SuppressWarnings("cast") // warning: redundant cast
|
||||
final class Double256Vector extends DoubleVector {
|
||||
static final DoubleSpecies VSPECIES =
|
||||
(DoubleSpecies) DoubleVector.SPECIES_256;
|
||||
|
||||
static final VectorShape VSHAPE =
|
||||
VSPECIES.vectorShape();
|
||||
|
||||
static final Class<Double256Vector> VCLASS = Double256Vector.class;
|
||||
|
||||
static final int VSIZE = VSPECIES.vectorBitSize();
|
||||
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double256Vector(double[] v) {
|
||||
super(v);
|
||||
}
|
||||
|
||||
// For compatibility as Double256Vector::new,
|
||||
// stored into species.vectorFactory.
|
||||
Double256Vector(Object v) {
|
||||
this((double[]) v);
|
||||
}
|
||||
|
||||
static final Double256Vector ZERO = new Double256Vector(new double[VLENGTH]);
|
||||
static final Double256Vector IOTA = new Double256Vector(VSPECIES.iotaArray());
|
||||
|
||||
static {
|
||||
// Warm up a few species caches.
|
||||
// If we do this too much we will
|
||||
// get NPEs from bootstrap circularity.
|
||||
VSPECIES.dummyVector();
|
||||
VSPECIES.withLanes(LaneType.BYTE);
|
||||
}
|
||||
|
||||
// Specialized extractors
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public DoubleSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractVector, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Class<Double> elementType() { return double.class; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int elementSize() { return Double.SIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final VectorShape shape() { return VSHAPE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int length() { return VLENGTH; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int bitSize() { return VSIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int byteSize() { return VSIZE / Byte.SIZE; }
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final @Override
|
||||
double[] vec() {
|
||||
return (double[])getPayload();
|
||||
}
|
||||
|
||||
// Virtualized constructors
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double256Vector broadcast(double e) {
|
||||
return (Double256Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double256Vector broadcast(long e) {
|
||||
return (Double256Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double256Mask maskFromArray(boolean[] bits) {
|
||||
return new Double256Mask(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double256Shuffle iotaShuffle() { return Double256Shuffle.IOTA; }
|
||||
|
||||
@ForceInline
|
||||
Double256Shuffle iotaShuffle(int start, int step, boolean wrap) {
|
||||
if (wrap) {
|
||||
return (Double256Shuffle)VectorSupport.shuffleIota(ETYPE, Double256Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
|
||||
} else {
|
||||
return (Double256Shuffle)VectorSupport.shuffleIota(ETYPE, Double256Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double256Shuffle shuffleFromBytes(byte[] reorder) { return new Double256Shuffle(reorder); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double256Shuffle shuffleFromArray(int[] indexes, int i) { return new Double256Shuffle(indexes, i); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double256Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Double256Shuffle(fn); }
|
||||
|
||||
// Make a vector of the same species but the given elements:
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double256Vector vectorFactory(double[] vec) {
|
||||
return new Double256Vector(vec);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte256Vector asByteVectorRaw() {
|
||||
return (Byte256Vector) super.asByteVectorRawTemplate(); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
AbstractVector<?> asVectorRaw(LaneType laneType) {
|
||||
return super.asVectorRawTemplate(laneType); // specialize
|
||||
}
|
||||
|
||||
// Unary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double256Vector uOp(FUnOp f) {
|
||||
return (Double256Vector) super.uOpTemplate(f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double256Vector uOp(VectorMask<Double> m, FUnOp f) {
|
||||
return (Double256Vector)
|
||||
super.uOpTemplate((Double256Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double256Vector bOp(Vector<Double> v, FBinOp f) {
|
||||
return (Double256Vector) super.bOpTemplate((Double256Vector)v, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double256Vector bOp(Vector<Double> v,
|
||||
VectorMask<Double> m, FBinOp f) {
|
||||
return (Double256Vector)
|
||||
super.bOpTemplate((Double256Vector)v, (Double256Mask)m,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
// Ternary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double256Vector tOp(Vector<Double> v1, Vector<Double> v2, FTriOp f) {
|
||||
return (Double256Vector)
|
||||
super.tOpTemplate((Double256Vector)v1, (Double256Vector)v2,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double256Vector tOp(Vector<Double> v1, Vector<Double> v2,
|
||||
VectorMask<Double> m, FTriOp f) {
|
||||
return (Double256Vector)
|
||||
super.tOpTemplate((Double256Vector)v1, (Double256Vector)v2,
|
||||
(Double256Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
double rOp(double v, FBinOp f) {
|
||||
return super.rOpTemplate(v, f); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> convertShape(VectorOperators.Conversion<Double,F> conv,
|
||||
VectorSpecies<F> rsp, int part) {
|
||||
return super.convertShapeTemplate(conv, rsp, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
|
||||
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
|
||||
}
|
||||
|
||||
// Specialized algebraic operations:
|
||||
|
||||
// The following definition forces a specialized version of this
|
||||
// crucial method into the v-table of this class. A call to add()
|
||||
// will inline to a call to lanewise(ADD,), at which point the JIT
|
||||
// intrinsic will have the opcode of ADD, plus all the metadata
|
||||
// for this particular class, enabling it to generate precise
|
||||
// code.
|
||||
//
|
||||
// There is probably no benefit to the JIT to specialize the
|
||||
// masked or broadcast versions of the lanewise method.
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector lanewise(Unary op) {
|
||||
return (Double256Vector) super.lanewiseTemplate(op); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector lanewise(Binary op, Vector<Double> v) {
|
||||
return (Double256Vector) super.lanewiseTemplate(op, v); // specialize
|
||||
}
|
||||
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Double256Vector
|
||||
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
|
||||
return (Double256Vector) super.lanewiseTemplate(op, v1, v2); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Double256Vector addIndex(int scale) {
|
||||
return (Double256Vector) super.addIndexTemplate(scale); // specialize
|
||||
}
|
||||
|
||||
// Type specific horizontal reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final double reduceLanes(VectorOperators.Associative op) {
|
||||
return super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final double reduceLanes(VectorOperators.Associative op,
|
||||
VectorMask<Double> m) {
|
||||
return super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op) {
|
||||
return (long) super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op,
|
||||
VectorMask<Double> m) {
|
||||
return (long) super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorShuffle<Double> toShuffle() {
|
||||
double[] a = toArray();
|
||||
int[] sa = new int[a.length];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
sa[i] = (int) a[i];
|
||||
}
|
||||
return VectorShuffle.fromArray(VSPECIES, sa, 0);
|
||||
}
|
||||
|
||||
// Specialized unary testing
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double256Mask test(Test op) {
|
||||
return super.testTemplate(Double256Mask.class, op); // specialize
|
||||
}
|
||||
|
||||
// Specialized comparisons
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double256Mask compare(Comparison op, Vector<Double> v) {
|
||||
return super.compareTemplate(Double256Mask.class, op, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double256Mask compare(Comparison op, double s) {
|
||||
return super.compareTemplate(Double256Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double256Mask compare(Comparison op, long s) {
|
||||
return super.compareTemplate(Double256Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector blend(Vector<Double> v, VectorMask<Double> m) {
|
||||
return (Double256Vector)
|
||||
super.blendTemplate(Double256Mask.class,
|
||||
(Double256Vector) v,
|
||||
(Double256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector slice(int origin, Vector<Double> v) {
|
||||
return (Double256Vector) super.sliceTemplate(origin, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector slice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Double256Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((double)(VLENGTH-origin))));
|
||||
Iota = iotaShuffle(origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector unslice(int origin, Vector<Double> w, int part) {
|
||||
return (Double256Vector) super.unsliceTemplate(origin, w, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector unslice(int origin, Vector<Double> w, int part, VectorMask<Double> m) {
|
||||
return (Double256Vector)
|
||||
super.unsliceTemplate(Double256Mask.class,
|
||||
origin, w, part,
|
||||
(Double256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector unslice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Double256Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((double)(origin))));
|
||||
Iota = iotaShuffle(-origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector rearrange(VectorShuffle<Double> s) {
|
||||
return (Double256Vector)
|
||||
super.rearrangeTemplate(Double256Shuffle.class,
|
||||
(Double256Shuffle) s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector rearrange(VectorShuffle<Double> shuffle,
|
||||
VectorMask<Double> m) {
|
||||
return (Double256Vector)
|
||||
super.rearrangeTemplate(Double256Shuffle.class,
|
||||
(Double256Shuffle) shuffle,
|
||||
(Double256Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector rearrange(VectorShuffle<Double> s,
|
||||
Vector<Double> v) {
|
||||
return (Double256Vector)
|
||||
super.rearrangeTemplate(Double256Shuffle.class,
|
||||
(Double256Shuffle) s,
|
||||
(Double256Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector selectFrom(Vector<Double> v) {
|
||||
return (Double256Vector)
|
||||
super.selectFromTemplate((Double256Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector selectFrom(Vector<Double> v,
|
||||
VectorMask<Double> m) {
|
||||
return (Double256Vector)
|
||||
super.selectFromTemplate((Double256Vector) v,
|
||||
(Double256Mask) m); // specialize
|
||||
}
|
||||
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public double lane(int i) {
|
||||
long bits;
|
||||
switch(i) {
|
||||
case 0: bits = laneHelper(0); break;
|
||||
case 1: bits = laneHelper(1); break;
|
||||
case 2: bits = laneHelper(2); break;
|
||||
case 3: bits = laneHelper(3); break;
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
return Double.longBitsToDouble(bits);
|
||||
}
|
||||
|
||||
public long laneHelper(int i) {
|
||||
return (long) VectorSupport.extract(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i,
|
||||
(vec, ix) -> {
|
||||
double[] vecarr = vec.vec();
|
||||
return (long)Double.doubleToLongBits(vecarr[ix]);
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Double256Vector withLane(int i, double e) {
|
||||
switch(i) {
|
||||
case 0: return withLaneHelper(0, e);
|
||||
case 1: return withLaneHelper(1, e);
|
||||
case 2: return withLaneHelper(2, e);
|
||||
case 3: return withLaneHelper(3, e);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public Double256Vector withLaneHelper(int i, double e) {
|
||||
return VectorSupport.insert(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i, (long)Double.doubleToLongBits(e),
|
||||
(v, ix, bits) -> {
|
||||
double[] res = v.vec().clone();
|
||||
res[ix] = Double.longBitsToDouble((long)bits);
|
||||
return v.vectorFactory(res);
|
||||
});
|
||||
}
|
||||
|
||||
// Mask
|
||||
|
||||
static final class Double256Mask extends AbstractMask<Double> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double256Mask(boolean[] bits) {
|
||||
this(bits, 0);
|
||||
}
|
||||
|
||||
Double256Mask(boolean[] bits, int offset) {
|
||||
super(prepare(bits, offset));
|
||||
}
|
||||
|
||||
Double256Mask(boolean val) {
|
||||
super(prepare(val));
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean[] bits, int offset) {
|
||||
boolean[] newBits = new boolean[VSPECIES.laneCount()];
|
||||
for (int i = 0; i < newBits.length; i++) {
|
||||
newBits[i] = bits[offset + i];
|
||||
}
|
||||
return newBits;
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean val) {
|
||||
boolean[] bits = new boolean[VSPECIES.laneCount()];
|
||||
Arrays.fill(bits, val);
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public DoubleSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractMask, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
boolean[] getBits() {
|
||||
return (boolean[])getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
Double256Mask uOp(MUnOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i]);
|
||||
}
|
||||
return new Double256Mask(res);
|
||||
}
|
||||
|
||||
@Override
|
||||
Double256Mask bOp(VectorMask<Double> m, MBinOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
boolean[] mbits = ((Double256Mask)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i], mbits[i]);
|
||||
}
|
||||
return new Double256Mask(res);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final
|
||||
Double256Vector toVector() {
|
||||
return (Double256Vector) super.toVectorTemplate(); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
|
||||
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorMask length and species length differ");
|
||||
boolean[] maskArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte256Vector.Byte256Mask(maskArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short256Vector.Short256Mask(maskArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int256Vector.Int256Mask(maskArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long256Vector.Long256Mask(maskArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float256Vector.Float256Mask(maskArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double256Vector.Double256Mask(maskArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
// Unary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Mask not() {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Mask and(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double256Mask m = (Double256Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_AND, Double256Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Mask or(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double256Mask m = (Double256Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_OR, Double256Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/* package-private */
|
||||
Double256Mask xor(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double256Mask m = (Double256Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double256Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean anyTrue() {
|
||||
return VectorSupport.test(BT_ne, Double256Mask.class, long.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> anyTrueHelper(((Double256Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean allTrue() {
|
||||
return VectorSupport.test(BT_overflow, Double256Mask.class, long.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> allTrueHelper(((Double256Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
static Double256Mask maskAll(boolean bit) {
|
||||
return VectorSupport.broadcastCoerced(Double256Mask.class, long.class, VLENGTH,
|
||||
(bit ? -1 : 0), null,
|
||||
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
|
||||
}
|
||||
private static final Double256Mask TRUE_MASK = new Double256Mask(true);
|
||||
private static final Double256Mask FALSE_MASK = new Double256Mask(false);
|
||||
|
||||
}
|
||||
|
||||
// Shuffle
|
||||
|
||||
static final class Double256Shuffle extends AbstractShuffle<Double> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double256Shuffle(byte[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Double256Shuffle(int[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Double256Shuffle(int[] reorder, int i) {
|
||||
super(VLENGTH, reorder, i);
|
||||
}
|
||||
|
||||
public Double256Shuffle(IntUnaryOperator fn) {
|
||||
super(VLENGTH, fn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleSpecies vspecies() {
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
static {
|
||||
// There must be enough bits in the shuffle lanes to encode
|
||||
// VLENGTH valid indexes and VLENGTH exceptional ones.
|
||||
assert(VLENGTH < Byte.MAX_VALUE);
|
||||
assert(Byte.MIN_VALUE <= -VLENGTH);
|
||||
}
|
||||
static final Double256Shuffle IOTA = new Double256Shuffle(IDENTITY);
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double256Vector toVector() {
|
||||
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double256Shuffle.class, this, VLENGTH,
|
||||
(s) -> ((Double256Vector)(((AbstractShuffle<Double>)(s)).toVectorTemplate())));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
|
||||
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorShuffle length and species length differ");
|
||||
int[] shuffleArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte256Vector.Byte256Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short256Vector.Short256Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int256Vector.Int256Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long256Vector.Long256Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float256Vector.Float256Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double256Vector.Double256Shuffle(shuffleArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Double256Shuffle rearrange(VectorShuffle<Double> shuffle) {
|
||||
Double256Shuffle s = (Double256Shuffle) shuffle;
|
||||
byte[] reorder1 = reorder();
|
||||
byte[] reorder2 = s.reorder();
|
||||
byte[] r = new byte[reorder1.length];
|
||||
for (int i = 0; i < reorder1.length; i++) {
|
||||
int ssi = reorder2[i];
|
||||
r[i] = reorder1[ssi]; // throws on exceptional index
|
||||
}
|
||||
return new Double256Shuffle(r);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================
|
||||
|
||||
// Specialized low-level memory operations.
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromArray0(double[] a, int offset) {
|
||||
return super.fromArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoArray0(double[] a, int offset) {
|
||||
super.intoArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
// End of specialized low-level memory operations.
|
||||
|
||||
// ================================================
|
||||
|
||||
}
|
||||
@ -0,0 +1,820 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import static jdk.internal.vm.vector.VectorSupport.*;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
// -- This file was mechanically generated: Do not edit! -- //
|
||||
|
||||
@SuppressWarnings("cast") // warning: redundant cast
|
||||
final class Double512Vector extends DoubleVector {
|
||||
static final DoubleSpecies VSPECIES =
|
||||
(DoubleSpecies) DoubleVector.SPECIES_512;
|
||||
|
||||
static final VectorShape VSHAPE =
|
||||
VSPECIES.vectorShape();
|
||||
|
||||
static final Class<Double512Vector> VCLASS = Double512Vector.class;
|
||||
|
||||
static final int VSIZE = VSPECIES.vectorBitSize();
|
||||
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double512Vector(double[] v) {
|
||||
super(v);
|
||||
}
|
||||
|
||||
// For compatibility as Double512Vector::new,
|
||||
// stored into species.vectorFactory.
|
||||
Double512Vector(Object v) {
|
||||
this((double[]) v);
|
||||
}
|
||||
|
||||
static final Double512Vector ZERO = new Double512Vector(new double[VLENGTH]);
|
||||
static final Double512Vector IOTA = new Double512Vector(VSPECIES.iotaArray());
|
||||
|
||||
static {
|
||||
// Warm up a few species caches.
|
||||
// If we do this too much we will
|
||||
// get NPEs from bootstrap circularity.
|
||||
VSPECIES.dummyVector();
|
||||
VSPECIES.withLanes(LaneType.BYTE);
|
||||
}
|
||||
|
||||
// Specialized extractors
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public DoubleSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractVector, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Class<Double> elementType() { return double.class; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int elementSize() { return Double.SIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final VectorShape shape() { return VSHAPE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int length() { return VLENGTH; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int bitSize() { return VSIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int byteSize() { return VSIZE / Byte.SIZE; }
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final @Override
|
||||
double[] vec() {
|
||||
return (double[])getPayload();
|
||||
}
|
||||
|
||||
// Virtualized constructors
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double512Vector broadcast(double e) {
|
||||
return (Double512Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double512Vector broadcast(long e) {
|
||||
return (Double512Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double512Mask maskFromArray(boolean[] bits) {
|
||||
return new Double512Mask(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double512Shuffle iotaShuffle() { return Double512Shuffle.IOTA; }
|
||||
|
||||
@ForceInline
|
||||
Double512Shuffle iotaShuffle(int start, int step, boolean wrap) {
|
||||
if (wrap) {
|
||||
return (Double512Shuffle)VectorSupport.shuffleIota(ETYPE, Double512Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
|
||||
} else {
|
||||
return (Double512Shuffle)VectorSupport.shuffleIota(ETYPE, Double512Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double512Shuffle shuffleFromBytes(byte[] reorder) { return new Double512Shuffle(reorder); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double512Shuffle shuffleFromArray(int[] indexes, int i) { return new Double512Shuffle(indexes, i); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double512Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Double512Shuffle(fn); }
|
||||
|
||||
// Make a vector of the same species but the given elements:
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double512Vector vectorFactory(double[] vec) {
|
||||
return new Double512Vector(vec);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte512Vector asByteVectorRaw() {
|
||||
return (Byte512Vector) super.asByteVectorRawTemplate(); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
AbstractVector<?> asVectorRaw(LaneType laneType) {
|
||||
return super.asVectorRawTemplate(laneType); // specialize
|
||||
}
|
||||
|
||||
// Unary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double512Vector uOp(FUnOp f) {
|
||||
return (Double512Vector) super.uOpTemplate(f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double512Vector uOp(VectorMask<Double> m, FUnOp f) {
|
||||
return (Double512Vector)
|
||||
super.uOpTemplate((Double512Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double512Vector bOp(Vector<Double> v, FBinOp f) {
|
||||
return (Double512Vector) super.bOpTemplate((Double512Vector)v, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double512Vector bOp(Vector<Double> v,
|
||||
VectorMask<Double> m, FBinOp f) {
|
||||
return (Double512Vector)
|
||||
super.bOpTemplate((Double512Vector)v, (Double512Mask)m,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
// Ternary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double512Vector tOp(Vector<Double> v1, Vector<Double> v2, FTriOp f) {
|
||||
return (Double512Vector)
|
||||
super.tOpTemplate((Double512Vector)v1, (Double512Vector)v2,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double512Vector tOp(Vector<Double> v1, Vector<Double> v2,
|
||||
VectorMask<Double> m, FTriOp f) {
|
||||
return (Double512Vector)
|
||||
super.tOpTemplate((Double512Vector)v1, (Double512Vector)v2,
|
||||
(Double512Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
double rOp(double v, FBinOp f) {
|
||||
return super.rOpTemplate(v, f); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> convertShape(VectorOperators.Conversion<Double,F> conv,
|
||||
VectorSpecies<F> rsp, int part) {
|
||||
return super.convertShapeTemplate(conv, rsp, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
|
||||
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
|
||||
}
|
||||
|
||||
// Specialized algebraic operations:
|
||||
|
||||
// The following definition forces a specialized version of this
|
||||
// crucial method into the v-table of this class. A call to add()
|
||||
// will inline to a call to lanewise(ADD,), at which point the JIT
|
||||
// intrinsic will have the opcode of ADD, plus all the metadata
|
||||
// for this particular class, enabling it to generate precise
|
||||
// code.
|
||||
//
|
||||
// There is probably no benefit to the JIT to specialize the
|
||||
// masked or broadcast versions of the lanewise method.
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector lanewise(Unary op) {
|
||||
return (Double512Vector) super.lanewiseTemplate(op); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector lanewise(Binary op, Vector<Double> v) {
|
||||
return (Double512Vector) super.lanewiseTemplate(op, v); // specialize
|
||||
}
|
||||
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Double512Vector
|
||||
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
|
||||
return (Double512Vector) super.lanewiseTemplate(op, v1, v2); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Double512Vector addIndex(int scale) {
|
||||
return (Double512Vector) super.addIndexTemplate(scale); // specialize
|
||||
}
|
||||
|
||||
// Type specific horizontal reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final double reduceLanes(VectorOperators.Associative op) {
|
||||
return super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final double reduceLanes(VectorOperators.Associative op,
|
||||
VectorMask<Double> m) {
|
||||
return super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op) {
|
||||
return (long) super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op,
|
||||
VectorMask<Double> m) {
|
||||
return (long) super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorShuffle<Double> toShuffle() {
|
||||
double[] a = toArray();
|
||||
int[] sa = new int[a.length];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
sa[i] = (int) a[i];
|
||||
}
|
||||
return VectorShuffle.fromArray(VSPECIES, sa, 0);
|
||||
}
|
||||
|
||||
// Specialized unary testing
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double512Mask test(Test op) {
|
||||
return super.testTemplate(Double512Mask.class, op); // specialize
|
||||
}
|
||||
|
||||
// Specialized comparisons
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double512Mask compare(Comparison op, Vector<Double> v) {
|
||||
return super.compareTemplate(Double512Mask.class, op, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double512Mask compare(Comparison op, double s) {
|
||||
return super.compareTemplate(Double512Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double512Mask compare(Comparison op, long s) {
|
||||
return super.compareTemplate(Double512Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector blend(Vector<Double> v, VectorMask<Double> m) {
|
||||
return (Double512Vector)
|
||||
super.blendTemplate(Double512Mask.class,
|
||||
(Double512Vector) v,
|
||||
(Double512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector slice(int origin, Vector<Double> v) {
|
||||
return (Double512Vector) super.sliceTemplate(origin, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector slice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Double512Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((double)(VLENGTH-origin))));
|
||||
Iota = iotaShuffle(origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector unslice(int origin, Vector<Double> w, int part) {
|
||||
return (Double512Vector) super.unsliceTemplate(origin, w, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector unslice(int origin, Vector<Double> w, int part, VectorMask<Double> m) {
|
||||
return (Double512Vector)
|
||||
super.unsliceTemplate(Double512Mask.class,
|
||||
origin, w, part,
|
||||
(Double512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector unslice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Double512Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((double)(origin))));
|
||||
Iota = iotaShuffle(-origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector rearrange(VectorShuffle<Double> s) {
|
||||
return (Double512Vector)
|
||||
super.rearrangeTemplate(Double512Shuffle.class,
|
||||
(Double512Shuffle) s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector rearrange(VectorShuffle<Double> shuffle,
|
||||
VectorMask<Double> m) {
|
||||
return (Double512Vector)
|
||||
super.rearrangeTemplate(Double512Shuffle.class,
|
||||
(Double512Shuffle) shuffle,
|
||||
(Double512Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector rearrange(VectorShuffle<Double> s,
|
||||
Vector<Double> v) {
|
||||
return (Double512Vector)
|
||||
super.rearrangeTemplate(Double512Shuffle.class,
|
||||
(Double512Shuffle) s,
|
||||
(Double512Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector selectFrom(Vector<Double> v) {
|
||||
return (Double512Vector)
|
||||
super.selectFromTemplate((Double512Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector selectFrom(Vector<Double> v,
|
||||
VectorMask<Double> m) {
|
||||
return (Double512Vector)
|
||||
super.selectFromTemplate((Double512Vector) v,
|
||||
(Double512Mask) m); // specialize
|
||||
}
|
||||
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public double lane(int i) {
|
||||
long bits;
|
||||
switch(i) {
|
||||
case 0: bits = laneHelper(0); break;
|
||||
case 1: bits = laneHelper(1); break;
|
||||
case 2: bits = laneHelper(2); break;
|
||||
case 3: bits = laneHelper(3); break;
|
||||
case 4: bits = laneHelper(4); break;
|
||||
case 5: bits = laneHelper(5); break;
|
||||
case 6: bits = laneHelper(6); break;
|
||||
case 7: bits = laneHelper(7); break;
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
return Double.longBitsToDouble(bits);
|
||||
}
|
||||
|
||||
public long laneHelper(int i) {
|
||||
return (long) VectorSupport.extract(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i,
|
||||
(vec, ix) -> {
|
||||
double[] vecarr = vec.vec();
|
||||
return (long)Double.doubleToLongBits(vecarr[ix]);
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Double512Vector withLane(int i, double e) {
|
||||
switch(i) {
|
||||
case 0: return withLaneHelper(0, e);
|
||||
case 1: return withLaneHelper(1, e);
|
||||
case 2: return withLaneHelper(2, e);
|
||||
case 3: return withLaneHelper(3, e);
|
||||
case 4: return withLaneHelper(4, e);
|
||||
case 5: return withLaneHelper(5, e);
|
||||
case 6: return withLaneHelper(6, e);
|
||||
case 7: return withLaneHelper(7, e);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public Double512Vector withLaneHelper(int i, double e) {
|
||||
return VectorSupport.insert(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i, (long)Double.doubleToLongBits(e),
|
||||
(v, ix, bits) -> {
|
||||
double[] res = v.vec().clone();
|
||||
res[ix] = Double.longBitsToDouble((long)bits);
|
||||
return v.vectorFactory(res);
|
||||
});
|
||||
}
|
||||
|
||||
// Mask
|
||||
|
||||
static final class Double512Mask extends AbstractMask<Double> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double512Mask(boolean[] bits) {
|
||||
this(bits, 0);
|
||||
}
|
||||
|
||||
Double512Mask(boolean[] bits, int offset) {
|
||||
super(prepare(bits, offset));
|
||||
}
|
||||
|
||||
Double512Mask(boolean val) {
|
||||
super(prepare(val));
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean[] bits, int offset) {
|
||||
boolean[] newBits = new boolean[VSPECIES.laneCount()];
|
||||
for (int i = 0; i < newBits.length; i++) {
|
||||
newBits[i] = bits[offset + i];
|
||||
}
|
||||
return newBits;
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean val) {
|
||||
boolean[] bits = new boolean[VSPECIES.laneCount()];
|
||||
Arrays.fill(bits, val);
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public DoubleSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractMask, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
boolean[] getBits() {
|
||||
return (boolean[])getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
Double512Mask uOp(MUnOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i]);
|
||||
}
|
||||
return new Double512Mask(res);
|
||||
}
|
||||
|
||||
@Override
|
||||
Double512Mask bOp(VectorMask<Double> m, MBinOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
boolean[] mbits = ((Double512Mask)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i], mbits[i]);
|
||||
}
|
||||
return new Double512Mask(res);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final
|
||||
Double512Vector toVector() {
|
||||
return (Double512Vector) super.toVectorTemplate(); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
|
||||
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorMask length and species length differ");
|
||||
boolean[] maskArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte512Vector.Byte512Mask(maskArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short512Vector.Short512Mask(maskArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int512Vector.Int512Mask(maskArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long512Vector.Long512Mask(maskArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float512Vector.Float512Mask(maskArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double512Vector.Double512Mask(maskArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
// Unary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Mask not() {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Mask and(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double512Mask m = (Double512Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_AND, Double512Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Mask or(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double512Mask m = (Double512Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_OR, Double512Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/* package-private */
|
||||
Double512Mask xor(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double512Mask m = (Double512Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double512Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean anyTrue() {
|
||||
return VectorSupport.test(BT_ne, Double512Mask.class, long.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> anyTrueHelper(((Double512Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean allTrue() {
|
||||
return VectorSupport.test(BT_overflow, Double512Mask.class, long.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> allTrueHelper(((Double512Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
static Double512Mask maskAll(boolean bit) {
|
||||
return VectorSupport.broadcastCoerced(Double512Mask.class, long.class, VLENGTH,
|
||||
(bit ? -1 : 0), null,
|
||||
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
|
||||
}
|
||||
private static final Double512Mask TRUE_MASK = new Double512Mask(true);
|
||||
private static final Double512Mask FALSE_MASK = new Double512Mask(false);
|
||||
|
||||
}
|
||||
|
||||
// Shuffle
|
||||
|
||||
static final class Double512Shuffle extends AbstractShuffle<Double> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double512Shuffle(byte[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Double512Shuffle(int[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Double512Shuffle(int[] reorder, int i) {
|
||||
super(VLENGTH, reorder, i);
|
||||
}
|
||||
|
||||
public Double512Shuffle(IntUnaryOperator fn) {
|
||||
super(VLENGTH, fn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleSpecies vspecies() {
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
static {
|
||||
// There must be enough bits in the shuffle lanes to encode
|
||||
// VLENGTH valid indexes and VLENGTH exceptional ones.
|
||||
assert(VLENGTH < Byte.MAX_VALUE);
|
||||
assert(Byte.MIN_VALUE <= -VLENGTH);
|
||||
}
|
||||
static final Double512Shuffle IOTA = new Double512Shuffle(IDENTITY);
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double512Vector toVector() {
|
||||
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double512Shuffle.class, this, VLENGTH,
|
||||
(s) -> ((Double512Vector)(((AbstractShuffle<Double>)(s)).toVectorTemplate())));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
|
||||
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorShuffle length and species length differ");
|
||||
int[] shuffleArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte512Vector.Byte512Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short512Vector.Short512Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int512Vector.Int512Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long512Vector.Long512Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float512Vector.Float512Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double512Vector.Double512Shuffle(shuffleArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Double512Shuffle rearrange(VectorShuffle<Double> shuffle) {
|
||||
Double512Shuffle s = (Double512Shuffle) shuffle;
|
||||
byte[] reorder1 = reorder();
|
||||
byte[] reorder2 = s.reorder();
|
||||
byte[] r = new byte[reorder1.length];
|
||||
for (int i = 0; i < reorder1.length; i++) {
|
||||
int ssi = reorder2[i];
|
||||
r[i] = reorder1[ssi]; // throws on exceptional index
|
||||
}
|
||||
return new Double512Shuffle(r);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================
|
||||
|
||||
// Specialized low-level memory operations.
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromArray0(double[] a, int offset) {
|
||||
return super.fromArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoArray0(double[] a, int offset) {
|
||||
super.intoArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
// End of specialized low-level memory operations.
|
||||
|
||||
// ================================================
|
||||
|
||||
}
|
||||
@ -0,0 +1,806 @@
|
||||
/*
|
||||
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntUnaryOperator;
|
||||
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.vector.VectorSupport;
|
||||
|
||||
import static jdk.internal.vm.vector.VectorSupport.*;
|
||||
|
||||
import static jdk.incubator.vector.VectorOperators.*;
|
||||
|
||||
// -- This file was mechanically generated: Do not edit! -- //
|
||||
|
||||
@SuppressWarnings("cast") // warning: redundant cast
|
||||
final class Double64Vector extends DoubleVector {
|
||||
static final DoubleSpecies VSPECIES =
|
||||
(DoubleSpecies) DoubleVector.SPECIES_64;
|
||||
|
||||
static final VectorShape VSHAPE =
|
||||
VSPECIES.vectorShape();
|
||||
|
||||
static final Class<Double64Vector> VCLASS = Double64Vector.class;
|
||||
|
||||
static final int VSIZE = VSPECIES.vectorBitSize();
|
||||
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double64Vector(double[] v) {
|
||||
super(v);
|
||||
}
|
||||
|
||||
// For compatibility as Double64Vector::new,
|
||||
// stored into species.vectorFactory.
|
||||
Double64Vector(Object v) {
|
||||
this((double[]) v);
|
||||
}
|
||||
|
||||
static final Double64Vector ZERO = new Double64Vector(new double[VLENGTH]);
|
||||
static final Double64Vector IOTA = new Double64Vector(VSPECIES.iotaArray());
|
||||
|
||||
static {
|
||||
// Warm up a few species caches.
|
||||
// If we do this too much we will
|
||||
// get NPEs from bootstrap circularity.
|
||||
VSPECIES.dummyVector();
|
||||
VSPECIES.withLanes(LaneType.BYTE);
|
||||
}
|
||||
|
||||
// Specialized extractors
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public DoubleSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractVector, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final Class<Double> elementType() { return double.class; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int elementSize() { return Double.SIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final VectorShape shape() { return VSHAPE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int length() { return VLENGTH; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int bitSize() { return VSIZE; }
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final int byteSize() { return VSIZE / Byte.SIZE; }
|
||||
|
||||
/*package-private*/
|
||||
@ForceInline
|
||||
final @Override
|
||||
double[] vec() {
|
||||
return (double[])getPayload();
|
||||
}
|
||||
|
||||
// Virtualized constructors
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double64Vector broadcast(double e) {
|
||||
return (Double64Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double64Vector broadcast(long e) {
|
||||
return (Double64Vector) super.broadcastTemplate(e); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double64Mask maskFromArray(boolean[] bits) {
|
||||
return new Double64Mask(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double64Shuffle iotaShuffle() { return Double64Shuffle.IOTA; }
|
||||
|
||||
@ForceInline
|
||||
Double64Shuffle iotaShuffle(int start, int step, boolean wrap) {
|
||||
if (wrap) {
|
||||
return (Double64Shuffle)VectorSupport.shuffleIota(ETYPE, Double64Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
|
||||
} else {
|
||||
return (Double64Shuffle)VectorSupport.shuffleIota(ETYPE, Double64Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
|
||||
(l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double64Shuffle shuffleFromBytes(byte[] reorder) { return new Double64Shuffle(reorder); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double64Shuffle shuffleFromArray(int[] indexes, int i) { return new Double64Shuffle(indexes, i); }
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
Double64Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Double64Shuffle(fn); }
|
||||
|
||||
// Make a vector of the same species but the given elements:
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double64Vector vectorFactory(double[] vec) {
|
||||
return new Double64Vector(vec);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Byte64Vector asByteVectorRaw() {
|
||||
return (Byte64Vector) super.asByteVectorRawTemplate(); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
AbstractVector<?> asVectorRaw(LaneType laneType) {
|
||||
return super.asVectorRawTemplate(laneType); // specialize
|
||||
}
|
||||
|
||||
// Unary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double64Vector uOp(FUnOp f) {
|
||||
return (Double64Vector) super.uOpTemplate(f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double64Vector uOp(VectorMask<Double> m, FUnOp f) {
|
||||
return (Double64Vector)
|
||||
super.uOpTemplate((Double64Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double64Vector bOp(Vector<Double> v, FBinOp f) {
|
||||
return (Double64Vector) super.bOpTemplate((Double64Vector)v, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double64Vector bOp(Vector<Double> v,
|
||||
VectorMask<Double> m, FBinOp f) {
|
||||
return (Double64Vector)
|
||||
super.bOpTemplate((Double64Vector)v, (Double64Mask)m,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
// Ternary operator
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double64Vector tOp(Vector<Double> v1, Vector<Double> v2, FTriOp f) {
|
||||
return (Double64Vector)
|
||||
super.tOpTemplate((Double64Vector)v1, (Double64Vector)v2,
|
||||
f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
Double64Vector tOp(Vector<Double> v1, Vector<Double> v2,
|
||||
VectorMask<Double> m, FTriOp f) {
|
||||
return (Double64Vector)
|
||||
super.tOpTemplate((Double64Vector)v1, (Double64Vector)v2,
|
||||
(Double64Mask)m, f); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
double rOp(double v, FBinOp f) {
|
||||
return super.rOpTemplate(v, f); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> convertShape(VectorOperators.Conversion<Double,F> conv,
|
||||
VectorSpecies<F> rsp, int part) {
|
||||
return super.convertShapeTemplate(conv, rsp, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final <F>
|
||||
Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
|
||||
return super.reinterpretShapeTemplate(toSpecies, part); // specialize
|
||||
}
|
||||
|
||||
// Specialized algebraic operations:
|
||||
|
||||
// The following definition forces a specialized version of this
|
||||
// crucial method into the v-table of this class. A call to add()
|
||||
// will inline to a call to lanewise(ADD,), at which point the JIT
|
||||
// intrinsic will have the opcode of ADD, plus all the metadata
|
||||
// for this particular class, enabling it to generate precise
|
||||
// code.
|
||||
//
|
||||
// There is probably no benefit to the JIT to specialize the
|
||||
// masked or broadcast versions of the lanewise method.
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector lanewise(Unary op) {
|
||||
return (Double64Vector) super.lanewiseTemplate(op); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector lanewise(Binary op, Vector<Double> v) {
|
||||
return (Double64Vector) super.lanewiseTemplate(op, v); // specialize
|
||||
}
|
||||
|
||||
|
||||
/*package-private*/
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Double64Vector
|
||||
lanewise(VectorOperators.Ternary op, Vector<Double> v1, Vector<Double> v2) {
|
||||
return (Double64Vector) super.lanewiseTemplate(op, v1, v2); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final
|
||||
Double64Vector addIndex(int scale) {
|
||||
return (Double64Vector) super.addIndexTemplate(scale); // specialize
|
||||
}
|
||||
|
||||
// Type specific horizontal reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final double reduceLanes(VectorOperators.Associative op) {
|
||||
return super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final double reduceLanes(VectorOperators.Associative op,
|
||||
VectorMask<Double> m) {
|
||||
return super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op) {
|
||||
return (long) super.reduceLanesTemplate(op); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final long reduceLanesToLong(VectorOperators.Associative op,
|
||||
VectorMask<Double> m) {
|
||||
return (long) super.reduceLanesTemplate(op, m); // specialized
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public VectorShuffle<Double> toShuffle() {
|
||||
double[] a = toArray();
|
||||
int[] sa = new int[a.length];
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
sa[i] = (int) a[i];
|
||||
}
|
||||
return VectorShuffle.fromArray(VSPECIES, sa, 0);
|
||||
}
|
||||
|
||||
// Specialized unary testing
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double64Mask test(Test op) {
|
||||
return super.testTemplate(Double64Mask.class, op); // specialize
|
||||
}
|
||||
|
||||
// Specialized comparisons
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double64Mask compare(Comparison op, Vector<Double> v) {
|
||||
return super.compareTemplate(Double64Mask.class, op, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double64Mask compare(Comparison op, double s) {
|
||||
return super.compareTemplate(Double64Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public final Double64Mask compare(Comparison op, long s) {
|
||||
return super.compareTemplate(Double64Mask.class, op, s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector blend(Vector<Double> v, VectorMask<Double> m) {
|
||||
return (Double64Vector)
|
||||
super.blendTemplate(Double64Mask.class,
|
||||
(Double64Vector) v,
|
||||
(Double64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector slice(int origin, Vector<Double> v) {
|
||||
return (Double64Vector) super.sliceTemplate(origin, v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector slice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Double64Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.LT, (broadcast((double)(VLENGTH-origin))));
|
||||
Iota = iotaShuffle(origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector unslice(int origin, Vector<Double> w, int part) {
|
||||
return (Double64Vector) super.unsliceTemplate(origin, w, part); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector unslice(int origin, Vector<Double> w, int part, VectorMask<Double> m) {
|
||||
return (Double64Vector)
|
||||
super.unsliceTemplate(Double64Mask.class,
|
||||
origin, w, part,
|
||||
(Double64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector unslice(int origin) {
|
||||
if ((origin < 0) || (origin >= VLENGTH)) {
|
||||
throw new ArrayIndexOutOfBoundsException("Index " + origin + " out of bounds for vector length " + VLENGTH);
|
||||
} else {
|
||||
Double64Shuffle Iota = iotaShuffle();
|
||||
VectorMask<Double> BlendMask = Iota.toVector().compare(VectorOperators.GE, (broadcast((double)(origin))));
|
||||
Iota = iotaShuffle(-origin, 1, true);
|
||||
return ZERO.blend(this.rearrange(Iota), BlendMask);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector rearrange(VectorShuffle<Double> s) {
|
||||
return (Double64Vector)
|
||||
super.rearrangeTemplate(Double64Shuffle.class,
|
||||
(Double64Shuffle) s); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector rearrange(VectorShuffle<Double> shuffle,
|
||||
VectorMask<Double> m) {
|
||||
return (Double64Vector)
|
||||
super.rearrangeTemplate(Double64Shuffle.class,
|
||||
(Double64Shuffle) shuffle,
|
||||
(Double64Mask) m); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector rearrange(VectorShuffle<Double> s,
|
||||
Vector<Double> v) {
|
||||
return (Double64Vector)
|
||||
super.rearrangeTemplate(Double64Shuffle.class,
|
||||
(Double64Shuffle) s,
|
||||
(Double64Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector selectFrom(Vector<Double> v) {
|
||||
return (Double64Vector)
|
||||
super.selectFromTemplate((Double64Vector) v); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector selectFrom(Vector<Double> v,
|
||||
VectorMask<Double> m) {
|
||||
return (Double64Vector)
|
||||
super.selectFromTemplate((Double64Vector) v,
|
||||
(Double64Mask) m); // specialize
|
||||
}
|
||||
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public double lane(int i) {
|
||||
long bits;
|
||||
switch(i) {
|
||||
case 0: bits = laneHelper(0); break;
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
return Double.longBitsToDouble(bits);
|
||||
}
|
||||
|
||||
public long laneHelper(int i) {
|
||||
return (long) VectorSupport.extract(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i,
|
||||
(vec, ix) -> {
|
||||
double[] vecarr = vec.vec();
|
||||
return (long)Double.doubleToLongBits(vecarr[ix]);
|
||||
});
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Double64Vector withLane(int i, double e) {
|
||||
switch(i) {
|
||||
case 0: return withLaneHelper(0, e);
|
||||
default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
public Double64Vector withLaneHelper(int i, double e) {
|
||||
return VectorSupport.insert(
|
||||
VCLASS, ETYPE, VLENGTH,
|
||||
this, i, (long)Double.doubleToLongBits(e),
|
||||
(v, ix, bits) -> {
|
||||
double[] res = v.vec().clone();
|
||||
res[ix] = Double.longBitsToDouble((long)bits);
|
||||
return v.vectorFactory(res);
|
||||
});
|
||||
}
|
||||
|
||||
// Mask
|
||||
|
||||
static final class Double64Mask extends AbstractMask<Double> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double64Mask(boolean[] bits) {
|
||||
this(bits, 0);
|
||||
}
|
||||
|
||||
Double64Mask(boolean[] bits, int offset) {
|
||||
super(prepare(bits, offset));
|
||||
}
|
||||
|
||||
Double64Mask(boolean val) {
|
||||
super(prepare(val));
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean[] bits, int offset) {
|
||||
boolean[] newBits = new boolean[VSPECIES.laneCount()];
|
||||
for (int i = 0; i < newBits.length; i++) {
|
||||
newBits[i] = bits[offset + i];
|
||||
}
|
||||
return newBits;
|
||||
}
|
||||
|
||||
private static boolean[] prepare(boolean val) {
|
||||
boolean[] bits = new boolean[VSPECIES.laneCount()];
|
||||
Arrays.fill(bits, val);
|
||||
return bits;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
final @Override
|
||||
public DoubleSpecies vspecies() {
|
||||
// ISSUE: This should probably be a @Stable
|
||||
// field inside AbstractMask, rather than
|
||||
// a megamorphic method.
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
boolean[] getBits() {
|
||||
return (boolean[])getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
Double64Mask uOp(MUnOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i]);
|
||||
}
|
||||
return new Double64Mask(res);
|
||||
}
|
||||
|
||||
@Override
|
||||
Double64Mask bOp(VectorMask<Double> m, MBinOp f) {
|
||||
boolean[] res = new boolean[vspecies().laneCount()];
|
||||
boolean[] bits = getBits();
|
||||
boolean[] mbits = ((Double64Mask)m).getBits();
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
res[i] = f.apply(i, bits[i], mbits[i]);
|
||||
}
|
||||
return new Double64Mask(res);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public final
|
||||
Double64Vector toVector() {
|
||||
return (Double64Vector) super.toVectorTemplate(); // specialize
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <E> VectorMask<E> cast(VectorSpecies<E> s) {
|
||||
AbstractSpecies<E> species = (AbstractSpecies<E>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorMask length and species length differ");
|
||||
boolean[] maskArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte64Vector.Byte64Mask(maskArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short64Vector.Short64Mask(maskArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int64Vector.Int64Mask(maskArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long64Vector.Long64Mask(maskArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float64Vector.Float64Mask(maskArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double64Vector.Double64Mask(maskArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
// Unary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Mask not() {
|
||||
return xor(maskAll(true));
|
||||
}
|
||||
|
||||
// Binary operations
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Mask and(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double64Mask m = (Double64Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_AND, Double64Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a & b));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Mask or(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double64Mask m = (Double64Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_OR, Double64Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a | b));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/* package-private */
|
||||
Double64Mask xor(VectorMask<Double> mask) {
|
||||
Objects.requireNonNull(mask);
|
||||
Double64Mask m = (Double64Mask)mask;
|
||||
return VectorSupport.binaryOp(VECTOR_OP_XOR, Double64Mask.class, long.class, VLENGTH,
|
||||
this, m,
|
||||
(m1, m2) -> m1.bOp(m2, (i, a, b) -> a ^ b));
|
||||
}
|
||||
|
||||
// Reductions
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean anyTrue() {
|
||||
return VectorSupport.test(BT_ne, Double64Mask.class, long.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> anyTrueHelper(((Double64Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public boolean allTrue() {
|
||||
return VectorSupport.test(BT_overflow, Double64Mask.class, long.class, VLENGTH,
|
||||
this, vspecies().maskAll(true),
|
||||
(m, __) -> allTrueHelper(((Double64Mask)m).getBits()));
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
/*package-private*/
|
||||
static Double64Mask maskAll(boolean bit) {
|
||||
return VectorSupport.broadcastCoerced(Double64Mask.class, long.class, VLENGTH,
|
||||
(bit ? -1 : 0), null,
|
||||
(v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
|
||||
}
|
||||
private static final Double64Mask TRUE_MASK = new Double64Mask(true);
|
||||
private static final Double64Mask FALSE_MASK = new Double64Mask(false);
|
||||
|
||||
}
|
||||
|
||||
// Shuffle
|
||||
|
||||
static final class Double64Shuffle extends AbstractShuffle<Double> {
|
||||
static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
|
||||
static final Class<Double> ETYPE = double.class; // used by the JVM
|
||||
|
||||
Double64Shuffle(byte[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Double64Shuffle(int[] reorder) {
|
||||
super(VLENGTH, reorder);
|
||||
}
|
||||
|
||||
public Double64Shuffle(int[] reorder, int i) {
|
||||
super(VLENGTH, reorder, i);
|
||||
}
|
||||
|
||||
public Double64Shuffle(IntUnaryOperator fn) {
|
||||
super(VLENGTH, fn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleSpecies vspecies() {
|
||||
return VSPECIES;
|
||||
}
|
||||
|
||||
static {
|
||||
// There must be enough bits in the shuffle lanes to encode
|
||||
// VLENGTH valid indexes and VLENGTH exceptional ones.
|
||||
assert(VLENGTH < Byte.MAX_VALUE);
|
||||
assert(Byte.MIN_VALUE <= -VLENGTH);
|
||||
}
|
||||
static final Double64Shuffle IOTA = new Double64Shuffle(IDENTITY);
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public Double64Vector toVector() {
|
||||
return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double64Shuffle.class, this, VLENGTH,
|
||||
(s) -> ((Double64Vector)(((AbstractShuffle<Double>)(s)).toVectorTemplate())));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ForceInline
|
||||
public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
|
||||
AbstractSpecies<F> species = (AbstractSpecies<F>) s;
|
||||
if (length() != species.laneCount())
|
||||
throw new IllegalArgumentException("VectorShuffle length and species length differ");
|
||||
int[] shuffleArray = toArray();
|
||||
// enum-switches don't optimize properly JDK-8161245
|
||||
switch (species.laneType.switchKey) {
|
||||
case LaneType.SK_BYTE:
|
||||
return new Byte64Vector.Byte64Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_SHORT:
|
||||
return new Short64Vector.Short64Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_INT:
|
||||
return new Int64Vector.Int64Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_LONG:
|
||||
return new Long64Vector.Long64Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_FLOAT:
|
||||
return new Float64Vector.Float64Shuffle(shuffleArray).check(species);
|
||||
case LaneType.SK_DOUBLE:
|
||||
return new Double64Vector.Double64Shuffle(shuffleArray).check(species);
|
||||
}
|
||||
|
||||
// Should not reach here.
|
||||
throw new AssertionError(species);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
public Double64Shuffle rearrange(VectorShuffle<Double> shuffle) {
|
||||
Double64Shuffle s = (Double64Shuffle) shuffle;
|
||||
byte[] reorder1 = reorder();
|
||||
byte[] reorder2 = s.reorder();
|
||||
byte[] r = new byte[reorder1.length];
|
||||
for (int i = 0; i < reorder1.length; i++) {
|
||||
int ssi = reorder2[i];
|
||||
r[i] = reorder1[ssi]; // throws on exceptional index
|
||||
}
|
||||
return new Double64Shuffle(r);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================
|
||||
|
||||
// Specialized low-level memory operations.
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromArray0(double[] a, int offset) {
|
||||
return super.fromArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteArray0(byte[] a, int offset) {
|
||||
return super.fromByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
DoubleVector fromByteBuffer0(ByteBuffer bb, int offset) {
|
||||
return super.fromByteBuffer0Template(bb, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoArray0(double[] a, int offset) {
|
||||
super.intoArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@Override
|
||||
final
|
||||
void intoByteArray0(byte[] a, int offset) {
|
||||
super.intoByteArray0Template(a, offset); // specialize
|
||||
}
|
||||
|
||||
// End of specialized low-level memory operations.
|
||||
|
||||
// ================================================
|
||||
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user