Merge

2026-06-24 03:12:32 +00:00 · 2017-09-30 01:38:57 +00:00 · 2017-09-30 01:38:57 +00:00 · 87a8a4301f
commit 87a8a4301f
parent 12a11f2c65 b4347ad825
125 changed files with 5345 additions and 1169 deletions
--- a/make/common/Modules.gmk
+++ b/make/common/Modules.gmk
@ -113,6 +113,7 @@ PLATFORM_MODULES += \
    jdk.crypto.ec \
    jdk.dynalink \
    jdk.incubator.httpclient \
+    jdk.internal.vm.compiler.management \
    jdk.jsobject \
    jdk.localedata \
    jdk.naming.dns \
@ -215,6 +216,7 @@ endif

 ifeq ($(INCLUDE_GRAAL), false)
  MODULES_FILTER += jdk.internal.vm.compiler
+  MODULES_FILTER += jdk.internal.vm.compiler.management
 endif

 ################################################################################
--- a/make/gensrc/GensrcModuleLoaderMap.gmk
+++ b/make/gensrc/GensrcModuleLoaderMap.gmk
@ -54,15 +54,4 @@ $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java:

 GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java

-$(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat: \
-    $(TOPDIR)/src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat \
-    $(VARDEPS_FILE) $(BUILD_TOOLS_JDK)
-	$(MKDIR) -p $(@D)
-	$(RM) $@ $@.tmp
-	$(TOOL_GENCLASSLOADERMAP) -boot $(BOOT_MODULES_LIST) \
-	    -platform $(PLATFORM_MODULES_LIST) -o $@.tmp $<
-	$(MV) $@.tmp $@
-
-GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat
-
 ################################################################################
--- a/make/hotspot/lib/JvmFeatures.gmk
+++ b/make/hotspot/lib/JvmFeatures.gmk
@ -47,6 +47,9 @@ endif
 ifeq ($(call check-jvm-feature, zero), true)
  JVM_CFLAGS_FEATURES += -DZERO -DCC_INTERP -DZERO_LIBARCH='"$(OPENJDK_TARGET_CPU_LEGACY_LIB)"' $(LIBFFI_CFLAGS)
  JVM_LIBS_FEATURES += $(LIBFFI_LIBS)
+  ifeq ($(OPENJDK_TARGET_CPU), sparcv9)
+    BUILD_LIBJVM_EXTRA_FILES := $(TOPDIR)/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp
+  endif
 endif

 ifeq ($(call check-jvm-feature, shark), true)
--- a/make/jdk/src/classes/build/tools/module/GenModuleLoaderMap.java
+++ b/make/jdk/src/classes/build/tools/module/GenModuleLoaderMap.java
@ -77,30 +77,22 @@ public class GenModuleLoaderMap {
            throw new IllegalArgumentException(source + " not exist");
        }

-        boolean needsQuotes = outfile.toString().contains(".java.tmp");
-
        try (BufferedWriter bw = Files.newBufferedWriter(outfile, StandardCharsets.UTF_8);
             PrintWriter writer = new PrintWriter(bw)) {
            for (String line : Files.readAllLines(source)) {
                if (line.contains("@@BOOT_MODULE_NAMES@@")) {
-                    line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules, needsQuotes);
+                    line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules);
                } else if (line.contains("@@PLATFORM_MODULE_NAMES@@")) {
-                    line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules, needsQuotes);
+                    line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules);
                }
                writer.println(line);
            }
        }
    }

-    private static String patch(String s, String tag, Stream<String> stream, boolean needsQuotes) {
-        String mns = null;
-        if (needsQuotes) {
-            mns = stream.sorted()
-                .collect(Collectors.joining("\",\n            \""));
-        } else {
-            mns = stream.sorted()
-                .collect(Collectors.joining("\n"));
-        }
+    private static String patch(String s, String tag, Stream<String> stream) {
+        String mns = stream.sorted()
+            .collect(Collectors.joining("\",\n            \""));
        return s.replace(tag, mns);
    }

--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp
@ -2175,7 +2175,8 @@ class Assembler : public AbstractAssembler {
  inline void vsbox(       VectorRegister d, VectorRegister a);

  // SHA (introduced with Power 8)
-  // Not yet implemented.
+  inline void vshasigmad(VectorRegister d, VectorRegister a, bool st, int six);
+  inline void vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six);

  // Vector Binary Polynomial Multiplication (introduced with Power 8)
  inline void vpmsumb(  VectorRegister d, VectorRegister a, VectorRegister b);
@ -2286,6 +2287,10 @@ class Assembler : public AbstractAssembler {
  inline void lvsl(  VectorRegister d, Register s2);
  inline void lvsr(  VectorRegister d, Register s2);

+  // Endianess specific concatenation of 2 loaded vectors.
+  inline void load_perm(VectorRegister perm, Register addr);
+  inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm);
+
  // RegisterOrConstant versions.
  // These emitters choose between the versions using two registers and
  // those with register and immediate, depending on the content of roc.
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
@ -926,7 +926,8 @@ inline void Assembler::vncipherlast(VectorRegister d, VectorRegister a, VectorRe
 inline void Assembler::vsbox(       VectorRegister d, VectorRegister a)                   { emit_int32( VSBOX_OPCODE        | vrt(d) | vra(a)         ); }

 // SHA (introduced with Power 8)
-// Not yet implemented.
+inline void Assembler::vshasigmad(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAD_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
+inline void Assembler::vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAW_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }

 // Vector Binary Polynomial Multiplication (introduced with Power 8)
 inline void Assembler::vpmsumb(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPMSUMB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
@ -1035,6 +1036,22 @@ inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL
 inline void Assembler::lvsl(  VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE   | vrt(d) | rb(s2)); }
 inline void Assembler::lvsr(  VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE   | vrt(d) | rb(s2)); }

+inline void Assembler::load_perm(VectorRegister perm, Register addr) {
+#if defined(VM_LITTLE_ENDIAN)
+  lvsr(perm, addr);
+#else
+  lvsl(perm, addr);
+#endif
+}
+
+inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm) {
+#if defined(VM_LITTLE_ENDIAN)
+  vperm(first_dest, second, first_dest, perm);
+#else
+  vperm(first_dest, first_dest, second, perm);
+#endif
+}
+
 inline void Assembler::load_const(Register d, void* x, Register tmp) {
   load_const(d, (long)x, tmp);
 }
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@ -866,6 +866,40 @@ class MacroAssembler: public Assembler {
  void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
                                  bool invertCRC);

+  // SHA-2 auxiliary functions and public interfaces
+ private:
+  void sha256_deque(const VectorRegister src,
+      const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
+  void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
+  void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
+  void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
+      const int total_ws, const Register k, const VectorRegister* kpws,
+      const int total_kpws);
+  void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
+      const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
+      const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
+      const Register j, const Register k);
+  void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
+      const VectorRegister c, const VectorRegister d, const VectorRegister e,
+      const VectorRegister f, const VectorRegister g, const VectorRegister h,
+      const Register hptr);
+
+  void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
+  void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
+  void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
+  void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
+  void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
+      const VectorRegister w2, const VectorRegister w3,
+      const VectorRegister w4, const VectorRegister w5,
+      const VectorRegister w6, const VectorRegister w7,
+      const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
+      const VectorRegister vRb, const Register k);
+
+ public:
+  void sha256(bool multi_block);
+  void sha512(bool multi_block);
+
+
  //
  // Debugging
  //
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
@ -3095,6 +3095,28 @@ class StubGenerator: public StubCodeGenerator {
     return start;
  }

+  address generate_sha256_implCompress(bool multi_block, const char *name) {
+    assert(UseSHA, "need SHA instructions");
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    __ sha256 (multi_block);
+
+    __ blr();
+    return start;
+  }
+
+  address generate_sha512_implCompress(bool multi_block, const char *name) {
+    assert(UseSHA, "need SHA instructions");
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    __ sha512 (multi_block);
+
+    __ blr();
+    return start;
+  }
+
  void generate_arraycopy_stubs() {
    // Note: the disjoint stubs must be generated first, some of
    // the conjoint stubs use them.
@ -3781,6 +3803,14 @@ class StubGenerator: public StubCodeGenerator {
      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
    }

+    if (UseSHA256Intrinsics) {
+      StubRoutines::_sha256_implCompress   = generate_sha256_implCompress(false, "sha256_implCompress");
+      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
+    }
+    if (UseSHA512Intrinsics) {
+      StubRoutines::_sha512_implCompress   = generate_sha512_implCompress(false, "sha512_implCompress");
+      StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
+    }
  }

 public:
--- a/src/hotspot/cpu/ppc/stubRoutines_ppc.hpp
+++ b/src/hotspot/cpu/ppc/stubRoutines_ppc.hpp
@ -34,7 +34,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_

 enum platform_dependent_constants {
  code_size1 = 20000,          // simply increase if too small (assembler will crash if too small)
-  code_size2 = 20000           // simply increase if too small (assembler will crash if too small)
+  code_size2 = 22000           // simply increase if too small (assembler will crash if too small)
 };

 // CRC32 Intrinsics.
--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
@ -113,7 +113,7 @@ void VM_Version::initialize() {
  // Create and print feature-string.
  char buf[(num_features+1) * 16]; // Max 16 chars per feature.
  jio_snprintf(buf, sizeof(buf),
-               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
               (has_fsqrt()   ? " fsqrt"   : ""),
               (has_isel()    ? " isel"    : ""),
               (has_lxarxeh() ? " lxarxeh" : ""),
@ -130,7 +130,8 @@ void VM_Version::initialize() {
               (has_mfdscr()  ? " mfdscr"  : ""),
               (has_vsx()     ? " vsx"     : ""),
               (has_ldbrx()   ? " ldbrx"   : ""),
-               (has_stdbrx()  ? " stdbrx"  : "")
+               (has_stdbrx()  ? " stdbrx"  : ""),
+               (has_vshasig() ? " sha"     : "")
               // Make sure number of %s matches num_features!
              );
  _features_string = os::strdup(buf);
@ -247,17 +248,43 @@ void VM_Version::initialize() {
    FLAG_SET_DEFAULT(UseFMA, true);
  }

-  if (UseSHA) {
-    warning("SHA instructions are not available on this CPU");
+  if (has_vshasig()) {
+    if (FLAG_IS_DEFAULT(UseSHA)) {
+      UseSHA = true;
+    }
+  } else if (UseSHA) {
+    if (!FLAG_IS_DEFAULT(UseSHA))
+      warning("SHA instructions are not available on this CPU");
    FLAG_SET_DEFAULT(UseSHA, false);
  }
-  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
-    warning("SHA intrinsics are not available on this CPU");
+
+  if (UseSHA1Intrinsics) {
+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+  }
+
+  if (UseSHA && has_vshasig()) {
+    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+    }
+  } else if (UseSHA256Intrinsics) {
+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+  }
+
+  if (UseSHA && has_vshasig()) {
+    if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+    }
+  } else if (UseSHA512Intrinsics) {
+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
  }

+  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
    UseSquareToLenIntrinsic = true;
  }
@ -663,6 +690,7 @@ void VM_Version::determine_features() {
  a->lxvd2x(VSR0, R3_ARG1);                    // code[14] -> vsx
  a->ldbrx(R7, R3_ARG1, R4_ARG2);              // code[15] -> ldbrx
  a->stdbrx(R7, R3_ARG1, R4_ARG2);             // code[16] -> stdbrx
+  a->vshasigmaw(VR0, VR1, 1, 0xF);             // code[17] -> vshasig
  a->blr();

  // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@ -714,6 +742,7 @@ void VM_Version::determine_features() {
  if (code[feature_cntr++]) features |= vsx_m;
  if (code[feature_cntr++]) features |= ldbrx_m;
  if (code[feature_cntr++]) features |= stdbrx_m;
+  if (code[feature_cntr++]) features |= vshasig_m;

  // Print the detection code.
  if (PrintAssembly) {
--- a/src/hotspot/cpu/ppc/vm_version_ppc.hpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.hpp
@ -49,6 +49,7 @@ protected:
    vsx,
    ldbrx,
    stdbrx,
+    vshasig,
    num_features // last entry to count features
  };
  enum Feature_Flag_Set {
@ -64,6 +65,7 @@ protected:
    vand_m                = (1 << vand   ),
    lqarx_m               = (1 << lqarx  ),
    vcipher_m             = (1 << vcipher),
+    vshasig_m             = (1 << vshasig),
    vpmsumb_m             = (1 << vpmsumb),
    tcheck_m              = (1 << tcheck ),
    mfdscr_m              = (1 << mfdscr ),
@ -106,6 +108,7 @@ public:
  static bool has_vsx()     { return (_features & vsx_m) != 0; }
  static bool has_ldbrx()   { return (_features & ldbrx_m) != 0; }
  static bool has_stdbrx()  { return (_features & stdbrx_m) != 0; }
+  static bool has_vshasig() { return (_features & vshasig_m) != 0; }
  static bool has_mtfprd()  { return has_vpmsumb(); } // alias for P8

  // Assembler testing
--- a/src/hotspot/cpu/s390/assembler_s390.hpp
+++ b/src/hotspot/cpu/s390/assembler_s390.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -250,7 +250,6 @@ class Address VALUE_OBJ_CLASS_SPEC {
  bool is_RSform()  { return has_base() && !has_index() && is_disp12(); }
  bool is_RSYform() { return has_base() && !has_index() && is_disp20(); }
  bool is_RXform()  { return has_base() &&  has_index() && is_disp12(); }
-  bool is_RXEform() { return has_base() &&  has_index() && is_disp12(); }
  bool is_RXYform() { return has_base() &&  has_index() && is_disp20(); }

  bool uses(Register r) { return _base == r || _index == r; };
@ -1093,7 +1092,194 @@ class Assembler : public AbstractAssembler {
 #define TRTT_ZOPC   (unsigned  int)(0xb9 << 24 | 0x90 << 16)


-// Miscellaneous Operations
+//---------------------------
+//--  Vector Instructions  --
+//---------------------------
+
+//---<  Vector Support Instructions  >---
+
+//---  Load (memory)  ---
+
+#define VLM_ZOPC    (unsigned long)(0xe7L << 40 | 0x36L << 0)   // load full vreg range (n * 128 bit)
+#define VL_ZOPC     (unsigned long)(0xe7L << 40 | 0x06L << 0)   // load full vreg (128 bit)
+#define VLEB_ZOPC   (unsigned long)(0xe7L << 40 | 0x00L << 0)   // load vreg element (8 bit)
+#define VLEH_ZOPC   (unsigned long)(0xe7L << 40 | 0x01L << 0)   // load vreg element (16 bit)
+#define VLEF_ZOPC   (unsigned long)(0xe7L << 40 | 0x03L << 0)   // load vreg element (32 bit)
+#define VLEG_ZOPC   (unsigned long)(0xe7L << 40 | 0x02L << 0)   // load vreg element (64 bit)
+
+#define VLREP_ZOPC  (unsigned long)(0xe7L << 40 | 0x05L << 0)   // load and replicate into all vector elements
+#define VLLEZ_ZOPC  (unsigned long)(0xe7L << 40 | 0x04L << 0)   // load logical element and zero.
+
+// vector register gather
+#define VGEF_ZOPC   (unsigned long)(0xe7L << 40 | 0x13L << 0)   // gather element (32 bit), V1(M3) = [D2(V2(M3),B2)]
+#define VGEG_ZOPC   (unsigned long)(0xe7L << 40 | 0x12L << 0)   // gather element (64 bit), V1(M3) = [D2(V2(M3),B2)]
+// vector register scatter
+#define VSCEF_ZOPC  (unsigned long)(0xe7L << 40 | 0x1bL << 0)   // vector scatter element FW
+#define VSCEG_ZOPC  (unsigned long)(0xe7L << 40 | 0x1aL << 0)   // vector scatter element DW
+
+#define VLBB_ZOPC   (unsigned long)(0xe7L << 40 | 0x07L << 0)   // load vreg to block boundary (load to alignment).
+#define VLL_ZOPC    (unsigned long)(0xe7L << 40 | 0x37L << 0)   // load vreg with length.
+
+//---  Load (register)  ---
+
+#define VLR_ZOPC    (unsigned long)(0xe7L << 40 | 0x56L << 0)   // copy full vreg (128 bit)
+#define VLGV_ZOPC   (unsigned long)(0xe7L << 40 | 0x21L << 0)   // copy vreg element -> GR
+#define VLVG_ZOPC   (unsigned long)(0xe7L << 40 | 0x22L << 0)   // copy GR -> vreg element
+#define VLVGP_ZOPC  (unsigned long)(0xe7L << 40 | 0x62L << 0)   // copy GR2, GR3 (disjoint pair) -> vreg
+
+// vector register pack: cut in half the size the source vector elements
+#define VPK_ZOPC    (unsigned long)(0xe7L << 40 | 0x94L << 0)   // just cut
+#define VPKS_ZOPC   (unsigned long)(0xe7L << 40 | 0x97L << 0)   // saturate as signed values
+#define VPKLS_ZOPC  (unsigned long)(0xe7L << 40 | 0x95L << 0)   // saturate as unsigned values
+
+// vector register unpack: double in size the source vector elements
+#define VUPH_ZOPC   (unsigned long)(0xe7L << 40 | 0xd7L << 0)   // signed, left half of the source vector elements
+#define VUPLH_ZOPC  (unsigned long)(0xe7L << 40 | 0xd5L << 0)   // unsigned, left half of the source vector elements
+#define VUPL_ZOPC   (unsigned long)(0xe7L << 40 | 0xd6L << 0)   // signed, right half of the source vector elements
+#define VUPLL_ZOPC  (unsigned long)(0xe7L << 40 | 0xd4L << 0)   // unsigned, right half of the source vector element
+
+// vector register merge
+#define VMRH_ZOPC   (unsigned long)(0xe7L << 40 | 0x61L << 0)   // register merge high (left half of source registers)
+#define VMRL_ZOPC   (unsigned long)(0xe7L << 40 | 0x60L << 0)   // register merge low (right half of source registers)
+
+// vector register permute
+#define VPERM_ZOPC  (unsigned long)(0xe7L << 40 | 0x8cL << 0)   // vector permute
+#define VPDI_ZOPC   (unsigned long)(0xe7L << 40 | 0x84L << 0)   // vector permute DW immediate
+
+// vector register replicate
+#define VREP_ZOPC   (unsigned long)(0xe7L << 40 | 0x4dL << 0)   // vector replicate
+#define VREPI_ZOPC  (unsigned long)(0xe7L << 40 | 0x45L << 0)   // vector replicate immediate
+#define VSEL_ZOPC   (unsigned long)(0xe7L << 40 | 0x8dL << 0)   // vector select
+
+#define VSEG_ZOPC   (unsigned long)(0xe7L << 40 | 0x5fL << 0)   // vector sign-extend to DW (rightmost element in each DW).
+
+//---  Load (immediate)  ---
+
+#define VLEIB_ZOPC  (unsigned long)(0xe7L << 40 | 0x40L << 0)   // load vreg element (16 bit imm to 8 bit)
+#define VLEIH_ZOPC  (unsigned long)(0xe7L << 40 | 0x41L << 0)   // load vreg element (16 bit imm to 16 bit)
+#define VLEIF_ZOPC  (unsigned long)(0xe7L << 40 | 0x43L << 0)   // load vreg element (16 bit imm to 32 bit)
+#define VLEIG_ZOPC  (unsigned long)(0xe7L << 40 | 0x42L << 0)   // load vreg element (16 bit imm to 64 bit)
+
+//---  Store  ---
+
+#define VSTM_ZOPC   (unsigned long)(0xe7L << 40 | 0x3eL << 0)   // store full vreg range (n * 128 bit)
+#define VST_ZOPC    (unsigned long)(0xe7L << 40 | 0x0eL << 0)   // store full vreg (128 bit)
+#define VSTEB_ZOPC  (unsigned long)(0xe7L << 40 | 0x08L << 0)   // store vreg element (8 bit)
+#define VSTEH_ZOPC  (unsigned long)(0xe7L << 40 | 0x09L << 0)   // store vreg element (16 bit)
+#define VSTEF_ZOPC  (unsigned long)(0xe7L << 40 | 0x0bL << 0)   // store vreg element (32 bit)
+#define VSTEG_ZOPC  (unsigned long)(0xe7L << 40 | 0x0aL << 0)   // store vreg element (64 bit)
+#define VSTL_ZOPC   (unsigned long)(0xe7L << 40 | 0x3fL << 0)   // store vreg with length.
+
+//---  Misc  ---
+
+#define VGM_ZOPC    (unsigned long)(0xe7L << 40 | 0x46L << 0)   // generate bit  mask, [start..end] = '1', else '0'
+#define VGBM_ZOPC   (unsigned long)(0xe7L << 40 | 0x44L << 0)   // generate byte mask, bits(imm16) -> bytes
+
+//---<  Vector Arithmetic Instructions  >---
+
+// Load
+#define VLC_ZOPC    (unsigned long)(0xe7L << 40 | 0xdeL << 0)   // V1 := -V2,   element size = 2**m
+#define VLP_ZOPC    (unsigned long)(0xe7L << 40 | 0xdfL << 0)   // V1 := |V2|,  element size = 2**m
+
+// ADD
+#define VA_ZOPC     (unsigned long)(0xe7L << 40 | 0xf3L << 0)   // V1 := V2 + V3, element size = 2**m
+#define VACC_ZOPC   (unsigned long)(0xe7L << 40 | 0xf1L << 0)   // V1 := carry(V2 + V3), element size = 2**m
+
+// SUB
+#define VS_ZOPC     (unsigned long)(0xe7L << 40 | 0xf7L << 0)   // V1 := V2 - V3, element size = 2**m
+#define VSCBI_ZOPC  (unsigned long)(0xe7L << 40 | 0xf5L << 0)   // V1 := borrow(V2 - V3), element size = 2**m
+
+// MUL
+#define VML_ZOPC    (unsigned long)(0xe7L << 40 | 0xa2L << 0)   // V1 := V2 * V3, element size = 2**m
+#define VMH_ZOPC    (unsigned long)(0xe7L << 40 | 0xa3L << 0)   // V1 := V2 * V3, element size = 2**m
+#define VMLH_ZOPC   (unsigned long)(0xe7L << 40 | 0xa1L << 0)   // V1 := V2 * V3, element size = 2**m, unsigned
+#define VME_ZOPC    (unsigned long)(0xe7L << 40 | 0xa6L << 0)   // V1 := V2 * V3, element size = 2**m
+#define VMLE_ZOPC   (unsigned long)(0xe7L << 40 | 0xa4L << 0)   // V1 := V2 * V3, element size = 2**m, unsigned
+#define VMO_ZOPC    (unsigned long)(0xe7L << 40 | 0xa7L << 0)   // V1 := V2 * V3, element size = 2**m
+#define VMLO_ZOPC   (unsigned long)(0xe7L << 40 | 0xa5L << 0)   // V1 := V2 * V3, element size = 2**m, unsigned
+
+// MUL & ADD
+#define VMAL_ZOPC   (unsigned long)(0xe7L << 40 | 0xaaL << 0)   // V1 := V2 * V3 + V4, element size = 2**m
+#define VMAH_ZOPC   (unsigned long)(0xe7L << 40 | 0xabL << 0)   // V1 := V2 * V3 + V4, element size = 2**m
+#define VMALH_ZOPC  (unsigned long)(0xe7L << 40 | 0xa9L << 0)   // V1 := V2 * V3 + V4, element size = 2**m, unsigned
+#define VMAE_ZOPC   (unsigned long)(0xe7L << 40 | 0xaeL << 0)   // V1 := V2 * V3 + V4, element size = 2**m
+#define VMALE_ZOPC  (unsigned long)(0xe7L << 40 | 0xacL << 0)   // V1 := V2 * V3 + V4, element size = 2**m, unsigned
+#define VMAO_ZOPC   (unsigned long)(0xe7L << 40 | 0xafL << 0)   // V1 := V2 * V3 + V4, element size = 2**m
+#define VMALO_ZOPC  (unsigned long)(0xe7L << 40 | 0xadL << 0)   // V1 := V2 * V3 + V4, element size = 2**m, unsigned
+
+// Vector SUM
+#define VSUM_ZOPC   (unsigned long)(0xe7L << 40 | 0x64L << 0)   // V1[j] := toFW(sum(V2[i]) + V3[j]), subelements: byte or HW
+#define VSUMG_ZOPC  (unsigned long)(0xe7L << 40 | 0x65L << 0)   // V1[j] := toDW(sum(V2[i]) + V3[j]), subelements: HW or FW
+#define VSUMQ_ZOPC  (unsigned long)(0xe7L << 40 | 0x67L << 0)   // V1[j] := toQW(sum(V2[i]) + V3[j]), subelements: FW or DW
+
+// Average
+#define VAVG_ZOPC   (unsigned long)(0xe7L << 40 | 0xf2L << 0)   // V1 := (V2+V3+1)/2, signed,   element size = 2**m
+#define VAVGL_ZOPC  (unsigned long)(0xe7L << 40 | 0xf0L << 0)   // V1 := (V2+V3+1)/2, unsigned, element size = 2**m
+
+// VECTOR Galois Field Multiply Sum
+#define VGFM_ZOPC   (unsigned long)(0xe7L << 40 | 0xb4L << 0)
+#define VGFMA_ZOPC  (unsigned long)(0xe7L << 40 | 0xbcL << 0)
+
+//---<  Vector Logical Instructions  >---
+
+// AND
+#define VN_ZOPC     (unsigned long)(0xe7L << 40 | 0x68L << 0)   // V1 := V2 & V3,  element size = 2**m
+#define VNC_ZOPC    (unsigned long)(0xe7L << 40 | 0x69L << 0)   // V1 := V2 & ~V3, element size = 2**m
+
+// XOR
+#define VX_ZOPC     (unsigned long)(0xe7L << 40 | 0x6dL << 0)   // V1 := V2 ^ V3,  element size = 2**m
+
+// NOR
+#define VNO_ZOPC    (unsigned long)(0xe7L << 40 | 0x6bL << 0)   // V1 := !(V2 | V3),  element size = 2**m
+
+// OR
+#define VO_ZOPC     (unsigned long)(0xe7L << 40 | 0x6aL << 0)   // V1 := V2 | V3,  element size = 2**m
+
+// Comparison (element-wise)
+#define VCEQ_ZOPC   (unsigned long)(0xe7L << 40 | 0xf8L << 0)   // V1 := (V2 == V3) ? 0xffff : 0x0000, element size = 2**m
+#define VCH_ZOPC    (unsigned long)(0xe7L << 40 | 0xfbL << 0)   // V1 := (V2  > V3) ? 0xffff : 0x0000, element size = 2**m, signed
+#define VCHL_ZOPC   (unsigned long)(0xe7L << 40 | 0xf9L << 0)   // V1 := (V2  > V3) ? 0xffff : 0x0000, element size = 2**m, unsigned
+
+// Max/Min (element-wise)
+#define VMX_ZOPC    (unsigned long)(0xe7L << 40 | 0xffL << 0)   // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, signed
+#define VMXL_ZOPC   (unsigned long)(0xe7L << 40 | 0xfdL << 0)   // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, unsigned
+#define VMN_ZOPC    (unsigned long)(0xe7L << 40 | 0xfeL << 0)   // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, signed
+#define VMNL_ZOPC   (unsigned long)(0xe7L << 40 | 0xfcL << 0)   // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, unsigned
+
+// Leading/Trailing Zeros, population count
+#define VCLZ_ZOPC   (unsigned long)(0xe7L << 40 | 0x53L << 0)   // V1 := leadingzeros(V2),  element size = 2**m
+#define VCTZ_ZOPC   (unsigned long)(0xe7L << 40 | 0x52L << 0)   // V1 := trailingzeros(V2), element size = 2**m
+#define VPOPCT_ZOPC (unsigned long)(0xe7L << 40 | 0x50L << 0)   // V1 := popcount(V2), bytewise!!
+
+// Rotate/Shift
+#define VERLLV_ZOPC (unsigned long)(0xe7L << 40 | 0x73L << 0)   // V1 := rotateleft(V2), rotate count in V3 element
+#define VERLL_ZOPC  (unsigned long)(0xe7L << 40 | 0x33L << 0)   // V1 := rotateleft(V3), rotate count from d2(b2).
+#define VERIM_ZOPC  (unsigned long)(0xe7L << 40 | 0x72L << 0)   // Rotate then insert under mask. Read Principles of Operation!!
+
+#define VESLV_ZOPC  (unsigned long)(0xe7L << 40 | 0x70L << 0)   // V1 := SLL(V2, V3), unsigned, element-wise
+#define VESL_ZOPC   (unsigned long)(0xe7L << 40 | 0x30L << 0)   // V1 := SLL(V3), unsigned, shift count from d2(b2).
+
+#define VESRAV_ZOPC (unsigned long)(0xe7L << 40 | 0x7AL << 0)   // V1 := SRA(V2, V3), signed, element-wise
+#define VESRA_ZOPC  (unsigned long)(0xe7L << 40 | 0x3AL << 0)   // V1 := SRA(V3), signed, shift count from d2(b2).
+#define VESRLV_ZOPC (unsigned long)(0xe7L << 40 | 0x78L << 0)   // V1 := SRL(V2, V3), unsigned, element-wise
+#define VESRL_ZOPC  (unsigned long)(0xe7L << 40 | 0x38L << 0)   // V1 := SRL(V3), unsigned, shift count from d2(b2).
+
+#define VSL_ZOPC    (unsigned long)(0xe7L << 40 | 0x74L << 0)   // V1 := SLL(V2), unsigned, bit-count
+#define VSLB_ZOPC   (unsigned long)(0xe7L << 40 | 0x75L << 0)   // V1 := SLL(V2), unsigned, byte-count
+#define VSLDB_ZOPC  (unsigned long)(0xe7L << 40 | 0x77L << 0)   // V1 := SLL((V2,V3)), unsigned, byte-count
+
+#define VSRA_ZOPC   (unsigned long)(0xe7L << 40 | 0x7eL << 0)   // V1 := SRA(V2), signed, bit-count
+#define VSRAB_ZOPC  (unsigned long)(0xe7L << 40 | 0x7fL << 0)   // V1 := SRA(V2), signed, byte-count
+#define VSRL_ZOPC   (unsigned long)(0xe7L << 40 | 0x7cL << 0)   // V1 := SRL(V2), unsigned, bit-count
+#define VSRLB_ZOPC  (unsigned long)(0xe7L << 40 | 0x7dL << 0)   // V1 := SRL(V2), unsigned, byte-count
+
+// Test under Mask
+#define VTM_ZOPC    (unsigned long)(0xe7L << 40 | 0xd8L << 0)   // Like TM, set CC according to state of selected bits.
+
+
+//--------------------------------
+//--  Miscellaneous Operations  --
+//--------------------------------

 // Execute
 #define EX_ZOPC     (unsigned  int)(68L << 24)
@ -1280,6 +1466,21 @@ class Assembler : public AbstractAssembler {
    to_minus_infinity = 7
  };

+  // Vector Register Element Type.
+  enum VRegElemType {
+    VRET_BYTE   = 0,
+    VRET_HW     = 1,
+    VRET_FW     = 2,
+    VRET_DW     = 3,
+    VRET_QW     = 4
+  };
+
+  // Vector Operation Condition Code Control.
+  enum VOpCCC {
+    VOP_CCIGN   = 0, // ignore, don't set CC
+    VOP_CCSET   = 1  // set the CC
+  };
+
  // Inverse condition code, i.e. determine "15 - cc" for a given condition code cc.
  static branch_condition inverse_condition(branch_condition cc);
  static branch_condition inverse_float_condition(branch_condition cc);
@ -1376,6 +1577,60 @@ class Assembler : public AbstractAssembler {
    return r;
  }

+  static int64_t rsmask_48( Address a) { assert(a.is_RSform(),  "bad address format"); return rsmask_48( a.disp12(), a.base()); }
+  static int64_t rxmask_48( Address a) {      if (a.is_RXform())  { return rxmask_48( a.disp12(), a.index(), a.base()); }
+                                         else if (a.is_RSform())  { return rsmask_48( a.disp12(),            a.base()); }
+                                         else                     { guarantee(false, "bad address format");  return 0;  }
+                                       }
+  static int64_t rsymask_48(Address a) { assert(a.is_RSYform(), "bad address format"); return rsymask_48(a.disp20(), a.base()); }
+  static int64_t rxymask_48(Address a) {      if (a.is_RXYform()) { return rxymask_48( a.disp20(), a.index(), a.base()); }
+                                         else if (a.is_RSYform()) { return rsymask_48( a.disp20(),            a.base()); }
+                                         else                     { guarantee(false, "bad address format");  return 0;   }
+                                       }
+
+  static int64_t rsmask_48( int64_t d2, Register b2)              { return uimm12(d2, 20, 48)                   | regz(b2, 16, 48); }
+  static int64_t rxmask_48( int64_t d2, Register x2, Register b2) { return uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48); }
+  static int64_t rsymask_48(int64_t d2, Register b2)              { return simm20(d2)                           | regz(b2, 16, 48); }
+  static int64_t rxymask_48(int64_t d2, Register x2, Register b2) { return simm20(d2)         | reg(x2, 12, 48) | regz(b2, 16, 48); }
+
+  // Address calculated from d12(vx,b) - vx is vector index register.
+  static int64_t rvmask_48( int64_t d2, VectorRegister x2, Register b2) { return uimm12(d2, 20, 48) | vreg(x2, 12) | regz(b2, 16, 48); }
+
+  static int64_t vreg_mask(VectorRegister v, int pos) {
+    return vreg(v, pos) | v->RXB_mask(pos);
+  }
+
+  // Vector Element Size Control. 4-bit field which indicates the size of the vector elements.
+  static int64_t vesc_mask(int64_t size, int min_size, int max_size, int pos) {
+    // min_size - minimum element size. Not all instructions support element sizes beginning with "byte".
+    // max_size - maximum element size. Not all instructions support element sizes up to "QW".
+    assert((min_size <= size) && (size <= max_size), "element size control out of range");
+    return uimm4(size, pos, 48);
+  }
+
+  // Vector Element IndeX. 4-bit field which indexes the target vector element.
+  static int64_t veix_mask(int64_t ix, int el_size, int pos) {
+    // el_size - size of the vector element. This is a VRegElemType enum value.
+    // ix      - vector element index.
+    int max_ix = -1;
+    switch (el_size) {
+      case VRET_BYTE: max_ix = 15; break;
+      case VRET_HW:   max_ix =  7; break;
+      case VRET_FW:   max_ix =  3; break;
+      case VRET_DW:   max_ix =  1; break;
+      case VRET_QW:   max_ix =  0; break;
+      default:        guarantee(false, "bad vector element size %d", el_size); break;
+    }
+    assert((0 <= ix) && (ix <= max_ix), "element size out of range (0 <= %ld <= %d)", ix, max_ix);
+    return uimm4(ix, pos, 48);
+  }
+
+  // Vector Operation Condition Code Control. 4-bit field, one bit of which indicates if the condition code is to be set by the operation.
+  static int64_t vccc_mask(int64_t flag, int pos) {
+    assert((flag == VOP_CCIGN) || (flag == VOP_CCSET), "VCCC flag value out of range");
+    return uimm4(flag, pos, 48);
+  }
+
 public:

  //--------------------------------------------------
@ -1453,6 +1708,8 @@ class Assembler : public AbstractAssembler {
  static long imm24(int64_t i24, int s, int len)   { return imm(i24, 24) << (len-s-24); }
  static long imm32(int64_t i32, int s, int len)   { return imm(i32, 32) << (len-s-32); }

+  static long vreg(VectorRegister v, int pos)      { const int len = 48; return u_field(v->encoding()&0x0f, (len-pos)-1, (len-pos)-4) | v->RXB_mask(pos); }
+
  static long fregt(FloatRegister r, int s, int len) { return freg(r,s,len); }
  static long freg( FloatRegister r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }

@ -2125,6 +2382,397 @@ class Assembler : public AbstractAssembler {
  inline void z_trtt(Register r1, Register r2, int64_t m3);


+  //---------------------------
+  //--  Vector Instructions  --
+  //---------------------------
+
+  //---<  Vector Support Instructions  >---
+
+  // Load (transfer from memory)
+  inline void z_vlm(   VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vl(    VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vleb(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vleh(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vlef(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vleg(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+
+  // Gather/Scatter
+  inline void z_vgef(  VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+  inline void z_vgeg(  VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+
+  inline void z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+  inline void z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+
+  // load and replicate
+  inline void z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vlrepb(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vlreph(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vlrepf(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vlrepg(VectorRegister v1, int64_t d2, Register x2, Register b2);
+
+  inline void z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vllezb(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vllezh(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vllezf(VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vllezg(VectorRegister v1, int64_t d2, Register x2, Register b2);
+
+  inline void z_vlbb(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vll(   VectorRegister v1, Register r3, int64_t d2, Register b2);
+
+  // Load (register to register)
+  inline void z_vlr(   VectorRegister v1, VectorRegister v2);
+
+  inline void z_vlgv(  Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
+  inline void z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2);
+
+  inline void z_vlvg(  VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4);
+  inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2);
+  inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2);
+  inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2);
+  inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2);
+
+  inline void z_vlvgp( VectorRegister v1, Register r2, Register r3);
+
+  // vector register pack
+  inline void z_vpk(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vpkh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpkf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpkg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  inline void z_vpks(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+  inline void z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpkshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpksfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpksgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  inline void z_vpkls(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+  inline void z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // vector register unpack (sign-extended)
+  inline void z_vuph(   VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vuphb(  VectorRegister v1, VectorRegister v2);
+  inline void z_vuphh(  VectorRegister v1, VectorRegister v2);
+  inline void z_vuphf(  VectorRegister v1, VectorRegister v2);
+  inline void z_vupl(   VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vuplb(  VectorRegister v1, VectorRegister v2);
+  inline void z_vuplh(  VectorRegister v1, VectorRegister v2);
+  inline void z_vuplf(  VectorRegister v1, VectorRegister v2);
+
+  // vector register unpack (zero-extended)
+  inline void z_vuplh(  VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vuplhb( VectorRegister v1, VectorRegister v2);
+  inline void z_vuplhh( VectorRegister v1, VectorRegister v2);
+  inline void z_vuplhf( VectorRegister v1, VectorRegister v2);
+  inline void z_vupll(  VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vupllb( VectorRegister v1, VectorRegister v2);
+  inline void z_vupllh( VectorRegister v1, VectorRegister v2);
+  inline void z_vupllf( VectorRegister v1, VectorRegister v2);
+
+  // vector register merge high/low
+  inline void z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmrhb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrhh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrhf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrhg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  inline void z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmrlb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrlh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrlf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmrlg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // vector register permute
+  inline void z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+  inline void z_vpdi(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t        m4);
+
+  // vector register replicate
+  inline void z_vrep(  VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4);
+  inline void z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2);
+  inline void z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2);
+  inline void z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2);
+  inline void z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2);
+  inline void z_vrepi( VectorRegister v1, int64_t imm2,      int64_t m3);
+  inline void z_vrepib(VectorRegister v1, int64_t imm2);
+  inline void z_vrepih(VectorRegister v1, int64_t imm2);
+  inline void z_vrepif(VectorRegister v1, int64_t imm2);
+  inline void z_vrepig(VectorRegister v1, int64_t imm2);
+
+  inline void z_vsel(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+  inline void z_vseg(  VectorRegister v1, VectorRegister v2, int64_t imm3);
+
+  // Load (immediate)
+  inline void z_vleib( VectorRegister v1, int64_t imm2, int64_t m3);
+  inline void z_vleih( VectorRegister v1, int64_t imm2, int64_t m3);
+  inline void z_vleif( VectorRegister v1, int64_t imm2, int64_t m3);
+  inline void z_vleig( VectorRegister v1, int64_t imm2, int64_t m3);
+
+  // Store
+  inline void z_vstm(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vst(   VectorRegister v1, int64_t d2, Register x2, Register b2);
+  inline void z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+  inline void z_vstl(  VectorRegister v1, Register r3, int64_t d2, Register b2);
+
+  // Misc
+  inline void z_vgm(   VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4);
+  inline void z_vgmb(  VectorRegister v1, int64_t imm2, int64_t imm3);
+  inline void z_vgmh(  VectorRegister v1, int64_t imm2, int64_t imm3);
+  inline void z_vgmf(  VectorRegister v1, int64_t imm2, int64_t imm3);
+  inline void z_vgmg(  VectorRegister v1, int64_t imm2, int64_t imm3);
+
+  inline void z_vgbm(  VectorRegister v1, int64_t imm2);
+  inline void z_vzero( VectorRegister v1); // preferred method to set vreg to all zeroes
+  inline void z_vone(  VectorRegister v1); // preferred method to set vreg to all ones
+
+  //---<  Vector Arithmetic Instructions  >---
+
+  // Load
+  inline void z_vlc(    VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vlcb(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlch(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlcf(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlcg(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlp(    VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vlpb(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlph(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlpf(   VectorRegister v1, VectorRegister v2);
+  inline void z_vlpg(   VectorRegister v1, VectorRegister v2);
+
+  // ADD
+  inline void z_va(     VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vab(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vah(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vaf(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vag(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vaq(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vacc(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vaccb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vacch(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vaccf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vaccg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vaccq(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // SUB
+  inline void z_vs(     VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vsb(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsh(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsf(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsg(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsq(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vscbi(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // MULTIPLY
+  inline void z_vml(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmh(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmlh(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vme(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmle(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmo(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmlo(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+
+  // MULTIPLY & ADD
+  inline void z_vmal(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmah(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmalh(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmae(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmale(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmao(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vmalo(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+
+  // VECTOR SUM
+  inline void z_vsum(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vsumb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsumh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsumg(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsumq(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // Average
+  inline void z_vavg(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vavgb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavgh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavgf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavgg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavgl(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // VECTOR Galois Field Multiply Sum
+  inline void z_vgfm(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vgfmb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vgfmh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vgfmf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vgfmg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  // VECTOR Galois Field Multiply Sum and Accumulate
+  inline void z_vgfma(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+  inline void z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+  inline void z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+  inline void z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+  inline void z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+
+  //---<  Vector Logical Instructions  >---
+
+  // AND
+  inline void z_vn(     VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vnc(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // XOR
+  inline void z_vx(     VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // NOR
+  inline void z_vno(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // OR
+  inline void z_vo(     VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // Comparison (element-wise)
+  inline void z_vceq(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+  inline void z_vceqb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vch(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+  inline void z_vchb(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchh(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchf(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchg(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchbs(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchhs(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchfs(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchgs(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+  inline void z_vchlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // Max/Min (element-wise)
+  inline void z_vmx(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmxb(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxh(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxf(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxg(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmxlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmxlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmn(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmnb(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnh(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnf(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmng(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+  inline void z_vmnlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vmnlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // Leading/Trailing Zeros, population count
+  inline void z_vclz(   VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vclzb(  VectorRegister v1, VectorRegister v2);
+  inline void z_vclzh(  VectorRegister v1, VectorRegister v2);
+  inline void z_vclzf(  VectorRegister v1, VectorRegister v2);
+  inline void z_vclzg(  VectorRegister v1, VectorRegister v2);
+  inline void z_vctz(   VectorRegister v1, VectorRegister v2, int64_t m3);
+  inline void z_vctzb(  VectorRegister v1, VectorRegister v2);
+  inline void z_vctzh(  VectorRegister v1, VectorRegister v2);
+  inline void z_vctzf(  VectorRegister v1, VectorRegister v2);
+  inline void z_vctzg(  VectorRegister v1, VectorRegister v2);
+  inline void z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3);
+
+  // Rotate/Shift
+  inline void z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4);
+  inline void z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_verll(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4);
+  inline void z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_verim(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5);
+  inline void z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+  inline void z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+  inline void z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+  inline void z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+
+  inline void z_veslv(  VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4);
+  inline void z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesl(   VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4);
+  inline void z_veslb(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_veslh(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_veslf(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_veslg(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+
+  inline void z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4);
+  inline void z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesra(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4);
+  inline void z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4);
+  inline void z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vesrl(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4);
+  inline void z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+  inline void z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+
+  inline void z_vsl(    VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vslb(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsldb(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+
+  inline void z_vsra(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsrab(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsrl(   VectorRegister v1, VectorRegister v2, VectorRegister v3);
+  inline void z_vsrlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+  // Test under Mask
+  inline void z_vtm(    VectorRegister v1, VectorRegister v2);
+
+
  // Floatingpoint instructions
  // ==========================

--- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp
+++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -702,6 +702,396 @@ inline void Assembler::z_cvd(Register r1, int64_t d2, Register x2, Register b2)
 inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }


+//---------------------------
+//--  Vector Instructions  --
+//---------------------------
+
+//---<  Vector Support Instructions  >---
+
+// Load (transfer from memory)
+inline void Assembler::z_vlm(    VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {emit_48(VLM_ZOPC   | vreg(v1,  8)     | vreg(v3, 12)     | rsmask_48(d2,     b2)); }
+inline void Assembler::z_vl(     VectorRegister v1, int64_t d2, Register x2, Register b2)             {emit_48(VL_ZOPC    | vreg(v1,  8)                        | rxmask_48(d2, x2, b2)); }
+inline void Assembler::z_vleb(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEB_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
+inline void Assembler::z_vleh(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEH_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW,   32)); }
+inline void Assembler::z_vlef(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEF_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW,   32)); }
+inline void Assembler::z_vleg(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEG_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW,   32)); }
+
+// Gather/Scatter
+inline void Assembler::z_vgef(   VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEF_ZOPC  | vreg(v1,  8)                 | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW,   32)); }
+inline void Assembler::z_vgeg(   VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEG_ZOPC  | vreg(v1,  8)                 | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW,   32)); }
+
+inline void Assembler::z_vscef(  VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEF_ZOPC | vreg(v1,  8)                 | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW,   32)); }
+inline void Assembler::z_vsceg(  VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEG_ZOPC | vreg(v1,  8)                 | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW,   32)); }
+
+// load and replicate
+inline void Assembler::z_vlrep(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLREP_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlrepb( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vlrep(v1, d2, x2, b2, VRET_BYTE); }// load byte and replicate to all vector elements of type 'B'
+inline void Assembler::z_vlreph( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vlrep(v1, d2, x2, b2, VRET_HW); }  // load HW   and replicate to all vector elements of type 'H'
+inline void Assembler::z_vlrepf( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vlrep(v1, d2, x2, b2, VRET_FW); }  // load FW   and replicate to all vector elements of type 'F'
+inline void Assembler::z_vlrepg( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vlrep(v1, d2, x2, b2, VRET_DW); }  // load DW   and replicate to all vector elements of type 'G'
+
+inline void Assembler::z_vllez(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLLEZ_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vllezb( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vllez(v1, d2, x2, b2, VRET_BYTE); }// load logical byte into left DW of VR, zero all other bit positions.
+inline void Assembler::z_vllezh( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vllez(v1, d2, x2, b2, VRET_HW); }  // load logical HW   into left DW of VR, zero all other bit positions.
+inline void Assembler::z_vllezf( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vllez(v1, d2, x2, b2, VRET_FW); }  // load logical FW   into left DW of VR, zero all other bit positions.
+inline void Assembler::z_vllezg( VectorRegister v1, int64_t d2, Register x2, Register b2)             {z_vllez(v1, d2, x2, b2, VRET_DW); }  // load logical DW   into left DW of VR, zero all other bit positions.
+
+inline void Assembler::z_vlbb(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLBB_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_vll(    VectorRegister v1, Register r3, int64_t d2, Register b2)             {emit_48(VLL_ZOPC   | vreg(v1,  8)     |  reg(r3, 12, 48) | rsmask_48(d2,     b2)); }
+
+// Load (register to register)
+inline void Assembler::z_vlr (   VectorRegister v1, VectorRegister v2)                                {emit_48(VLR_ZOPC   | vreg(v1,  8)     | vreg(v2, 12)); }
+
+inline void Assembler::z_vlgv(   Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VLGV_ZOPC  |  reg(r1,  8, 48) | vreg(v3, 12)     | rsmask_48(d2,     b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlgvb(  Register r1, VectorRegister v3, int64_t d2, Register b2)             {z_vlgv(r1, v3, d2, b2, VRET_BYTE); } // load byte from VR element (index d2(b2)) into GR (logical)
+inline void Assembler::z_vlgvh(  Register r1, VectorRegister v3, int64_t d2, Register b2)             {z_vlgv(r1, v3, d2, b2, VRET_HW); }   // load HW   from VR element (index d2(b2)) into GR (logical)
+inline void Assembler::z_vlgvf(  Register r1, VectorRegister v3, int64_t d2, Register b2)             {z_vlgv(r1, v3, d2, b2, VRET_FW); }   // load FW   from VR element (index d2(b2)) into GR (logical)
+inline void Assembler::z_vlgvg(  Register r1, VectorRegister v3, int64_t d2, Register b2)             {z_vlgv(r1, v3, d2, b2, VRET_DW); }   // load DW   from VR element (index d2(b2)) into GR.
+
+inline void Assembler::z_vlvg(   VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC  | vreg(v1,  8)     |  reg(r3, 12, 48) | rsmask_48(d2,     b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlvgb(  VectorRegister v1, Register r3, int64_t d2, Register b2)             {z_vlvg(v1, r3, d2, b2, VRET_BYTE); }
+inline void Assembler::z_vlvgh(  VectorRegister v1, Register r3, int64_t d2, Register b2)             {z_vlvg(v1, r3, d2, b2, VRET_HW); }
+inline void Assembler::z_vlvgf(  VectorRegister v1, Register r3, int64_t d2, Register b2)             {z_vlvg(v1, r3, d2, b2, VRET_FW); }
+inline void Assembler::z_vlvgg(  VectorRegister v1, Register r3, int64_t d2, Register b2)             {z_vlvg(v1, r3, d2, b2, VRET_DW); }
+
+inline void Assembler::z_vlvgp(  VectorRegister v1, Register r2, Register r3)                         {emit_48(VLVGP_ZOPC | vreg(v1,  8)     |  reg(r2, 12, 48) |  reg(r3, 16, 48)); }
+
+// vector register pack
+inline void Assembler::z_vpk(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPK_ZOPC   | vreg(v1,  8)     | vreg(v2, 12)     | vreg(v3, 16)     | vesc_mask(m4, VRET_HW, VRET_DW, 32)); }
+inline void Assembler::z_vpkh(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpk(v1, v2, v3, VRET_HW); }       // vector element type 'H'
+inline void Assembler::z_vpkf(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpk(v1, v2, v3, VRET_FW); }       // vector element type 'F'
+inline void Assembler::z_vpkg(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpk(v1, v2, v3, VRET_DW); }       // vector element type 'G'
+
+inline void Assembler::z_vpks(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKS_ZOPC  | vreg(v1,  8) |  vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | vccc_mask(cc5, 24)); }
+inline void Assembler::z_vpksh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_HW, VOP_CCIGN); }   // vector element type 'H', don't set CC
+inline void Assembler::z_vpksf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_FW, VOP_CCIGN); }   // vector element type 'F', don't set CC
+inline void Assembler::z_vpksg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_DW, VOP_CCIGN); }   // vector element type 'G', don't set CC
+inline void Assembler::z_vpkshs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_HW, VOP_CCSET); }   // vector element type 'H', set CC
+inline void Assembler::z_vpksfs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_FW, VOP_CCSET); }   // vector element type 'F', set CC
+inline void Assembler::z_vpksgs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpks(v1, v2, v3, VRET_DW, VOP_CCSET); }   // vector element type 'G', set CC
+
+inline void Assembler::z_vpkls(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKLS_ZOPC | vreg(v1,  8) |  vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | vccc_mask(cc5, 24)); }
+inline void Assembler::z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_HW, VOP_CCIGN); }  // vector element type 'H', don't set CC
+inline void Assembler::z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_FW, VOP_CCIGN); }  // vector element type 'F', don't set CC
+inline void Assembler::z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_DW, VOP_CCIGN); }  // vector element type 'G', don't set CC
+inline void Assembler::z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_HW, VOP_CCSET); }  // vector element type 'H', set CC
+inline void Assembler::z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_FW, VOP_CCSET); }  // vector element type 'F', set CC
+inline void Assembler::z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vpkls(v1, v2, v3, VRET_DW, VOP_CCSET); }  // vector element type 'G', set CC
+
+// vector register unpack (sign-extended)
+inline void Assembler::z_vuph(   VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VUPH_ZOPC  | vreg(v1,  8)     | vreg(v2, 12)     | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vuphb(  VectorRegister v1, VectorRegister v2)                                {z_vuph(v1, v2, VRET_BYTE); }        // vector element type 'B'
+inline void Assembler::z_vuphh(  VectorRegister v1, VectorRegister v2)                                {z_vuph(v1, v2, VRET_HW); }          // vector element type 'H'
+inline void Assembler::z_vuphf(  VectorRegister v1, VectorRegister v2)                                {z_vuph(v1, v2, VRET_FW); }          // vector element type 'F'
+inline void Assembler::z_vupl(   VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VUPL_ZOPC  | vreg(v1,  8)     | vreg(v2, 12)     | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vuplb(  VectorRegister v1, VectorRegister v2)                                {z_vupl(v1, v2, VRET_BYTE); }        // vector element type 'B'
+inline void Assembler::z_vuplh(  VectorRegister v1, VectorRegister v2)                                {z_vupl(v1, v2, VRET_HW); }          // vector element type 'H'
+inline void Assembler::z_vuplf(  VectorRegister v1, VectorRegister v2)                                {z_vupl(v1, v2, VRET_FW); }          // vector element type 'F'
+
+// vector register unpack (zero-extended)
+inline void Assembler::z_vuplh(  VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VUPLH_ZOPC | vreg(v1,  8)     | vreg(v2, 12)     | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vuplhb( VectorRegister v1, VectorRegister v2)                                {z_vuplh(v1, v2, VRET_BYTE); }       // vector element type 'B'
+inline void Assembler::z_vuplhh( VectorRegister v1, VectorRegister v2)                                {z_vuplh(v1, v2, VRET_HW); }         // vector element type 'H'
+inline void Assembler::z_vuplhf( VectorRegister v1, VectorRegister v2)                                {z_vuplh(v1, v2, VRET_FW); }         // vector element type 'F'
+inline void Assembler::z_vupll(  VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VUPLL_ZOPC | vreg(v1,  8)     | vreg(v2, 12)     | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vupllb( VectorRegister v1, VectorRegister v2)                                {z_vupll(v1, v2, VRET_BYTE); }       // vector element type 'B'
+inline void Assembler::z_vupllh( VectorRegister v1, VectorRegister v2)                                {z_vupll(v1, v2, VRET_HW); }         // vector element type 'H'
+inline void Assembler::z_vupllf( VectorRegister v1, VectorRegister v2)                                {z_vupll(v1, v2, VRET_FW); }         // vector element type 'F'
+
+// vector register merge high/low
+inline void Assembler::z_vmrh(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRH_ZOPC  | vreg(v1,  8)     | vreg(v2, 12)     | vreg(v3, 16)     | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmrhb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vmrhh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vmrhf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vmrhg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+
+inline void Assembler::z_vmrl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRL_ZOPC  | vreg(v1,  8)     | vreg(v2, 12)     | vreg(v3, 16)     | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmrlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vmrlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vmrlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vmrlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmrh(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+
+// vector register permute
+inline void Assembler::z_vperm(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VPERM_ZOPC | vreg(v1,  8) |  vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
+inline void Assembler::z_vpdi(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t        m4) {emit_48(VPDI_ZOPC  | vreg(v1,  8) |  vreg(v2, 12) | vreg(v3, 16) | uimm4(m4, 32, 48)); }
+
+// vector register replicate
+inline void Assembler::z_vrep(   VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4)      {emit_48(VREP_ZOPC  | vreg(v1,  8)     | vreg(v3, 12)     | simm16(imm2, 16, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vrepb(  VectorRegister v1, VectorRegister v3, int64_t imm2)                  {z_vrep(v1, v3, imm2, VRET_BYTE); }  // vector element type 'B'
+inline void Assembler::z_vreph(  VectorRegister v1, VectorRegister v3, int64_t imm2)                  {z_vrep(v1, v3, imm2, VRET_HW); }    // vector element type 'H'
+inline void Assembler::z_vrepf(  VectorRegister v1, VectorRegister v3, int64_t imm2)                  {z_vrep(v1, v3, imm2, VRET_FW); }    // vector element type 'F'
+inline void Assembler::z_vrepg(  VectorRegister v1, VectorRegister v3, int64_t imm2)                  {z_vrep(v1, v3, imm2, VRET_DW); }    // vector element type 'G'
+inline void Assembler::z_vrepi(  VectorRegister v1, int64_t imm2,      int64_t m3)                    {emit_48(VREPI_ZOPC | vreg(v1,  8)                        | simm16(imm2, 16, 48) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vrepib( VectorRegister v1, int64_t imm2)                                     {z_vrepi(v1, imm2, VRET_BYTE); }     // vector element type 'B'
+inline void Assembler::z_vrepih( VectorRegister v1, int64_t imm2)                                     {z_vrepi(v1, imm2, VRET_HW); }       // vector element type 'B'
+inline void Assembler::z_vrepif( VectorRegister v1, int64_t imm2)                                     {z_vrepi(v1, imm2, VRET_FW); }       // vector element type 'B'
+inline void Assembler::z_vrepig( VectorRegister v1, int64_t imm2)                                     {z_vrepi(v1, imm2, VRET_DW); }       // vector element type 'B'
+
+inline void Assembler::z_vsel(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VSEL_ZOPC  | vreg(v1,  8) |  vreg(v2, 12) |  vreg(v3, 16) |  vreg(v4, 32)); }
+inline void Assembler::z_vseg(   VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VSEG_ZOPC  | vreg(v1,  8)     | vreg(v2, 12)     | uimm4(m3, 32, 48)); }
+
+// Load (immediate)
+inline void Assembler::z_vleib(  VectorRegister v1, int64_t imm2, int64_t m3)                         {emit_48(VLEIB_ZOPC | vreg(v1,  8)                        | simm16(imm2, 32, 48)  | veix_mask(m3, VRET_BYTE, 32)); }
+inline void Assembler::z_vleih(  VectorRegister v1, int64_t imm2, int64_t m3)                         {emit_48(VLEIH_ZOPC | vreg(v1,  8)                        | simm16(imm2, 32, 48)  | veix_mask(m3, VRET_HW,   32)); }
+inline void Assembler::z_vleif(  VectorRegister v1, int64_t imm2, int64_t m3)                         {emit_48(VLEIF_ZOPC | vreg(v1,  8)                        | simm16(imm2, 32, 48)  | veix_mask(m3, VRET_FW,   32)); }
+inline void Assembler::z_vleig(  VectorRegister v1, int64_t imm2, int64_t m3)                         {emit_48(VLEIG_ZOPC | vreg(v1,  8)                        | simm16(imm2, 32, 48)  | veix_mask(m3, VRET_DW,   32)); }
+
+// Store
+inline void Assembler::z_vstm(   VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {emit_48(VSTM_ZOPC  | vreg(v1,  8)     | vreg(v3, 12)     | rsmask_48(d2,     b2)); }
+inline void Assembler::z_vst(    VectorRegister v1, int64_t d2, Register x2, Register b2)             {emit_48(VST_ZOPC   | vreg(v1,  8)                        | rxmask_48(d2, x2, b2)); }
+inline void Assembler::z_vsteb(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEB_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
+inline void Assembler::z_vsteh(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEH_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW,   32)); }
+inline void Assembler::z_vstef(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEF_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW,   32)); }
+inline void Assembler::z_vsteg(  VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEG_ZOPC | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW,   32)); }
+inline void Assembler::z_vstl(   VectorRegister v1, Register r3, int64_t d2, Register b2)             {emit_48(VSTL_ZOPC  | vreg(v1,  8)     |  reg(r3, 12, 48) | rsmask_48(d2,     b2)); }
+
+// Misc
+inline void Assembler::z_vgm(    VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4)           {emit_48(VGM_ZOPC   | vreg(v1,  8)     | uimm8( imm2, 16, 48) | uimm8(imm3, 24, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vgmb(   VectorRegister v1, int64_t imm2, int64_t imm3)                       {z_vgm(v1, imm2, imm3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vgmh(   VectorRegister v1, int64_t imm2, int64_t imm3)                       {z_vgm(v1, imm2, imm3, VRET_HW); }   // vector element type 'H'
+inline void Assembler::z_vgmf(   VectorRegister v1, int64_t imm2, int64_t imm3)                       {z_vgm(v1, imm2, imm3, VRET_FW); }   // vector element type 'F'
+inline void Assembler::z_vgmg(   VectorRegister v1, int64_t imm2, int64_t imm3)                       {z_vgm(v1, imm2, imm3, VRET_DW); }   // vector element type 'G'
+
+inline void Assembler::z_vgbm(   VectorRegister v1, int64_t imm2)                                     {emit_48(VGBM_ZOPC  | vreg(v1,  8)     | uimm16(imm2, 16, 48)); }
+inline void Assembler::z_vzero(  VectorRegister v1)                                                   {z_vgbm(v1, 0); }      // preferred method to set vreg to all zeroes
+inline void Assembler::z_vone(   VectorRegister v1)                                                   {z_vgbm(v1, 0xffff); } // preferred method to set vreg to all ones
+
+//---<  Vector Arithmetic Instructions  >---
+
+// Load
+inline void Assembler::z_vlc(    VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VLC_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlcb(   VectorRegister v1, VectorRegister v2)                                {z_vlc(v1, v2, VRET_BYTE); }         // vector element type 'B'
+inline void Assembler::z_vlch(   VectorRegister v1, VectorRegister v2)                                {z_vlc(v1, v2, VRET_HW); }           // vector element type 'H'
+inline void Assembler::z_vlcf(   VectorRegister v1, VectorRegister v2)                                {z_vlc(v1, v2, VRET_FW); }           // vector element type 'F'
+inline void Assembler::z_vlcg(   VectorRegister v1, VectorRegister v2)                                {z_vlc(v1, v2, VRET_DW); }           // vector element type 'G'
+inline void Assembler::z_vlp(    VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VLP_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlpb(   VectorRegister v1, VectorRegister v2)                                {z_vlp(v1, v2, VRET_BYTE); }         // vector element type 'B'
+inline void Assembler::z_vlph(   VectorRegister v1, VectorRegister v2)                                {z_vlp(v1, v2, VRET_HW); }           // vector element type 'H'
+inline void Assembler::z_vlpf(   VectorRegister v1, VectorRegister v2)                                {z_vlp(v1, v2, VRET_FW); }           // vector element type 'F'
+inline void Assembler::z_vlpg(   VectorRegister v1, VectorRegister v2)                                {z_vlp(v1, v2, VRET_DW); }           // vector element type 'G'
+
+// ADD
+inline void Assembler::z_va(     VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VA_ZOPC    | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vab(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_va(v1, v2, v3, VRET_BYTE); }      // vector element type 'B'
+inline void Assembler::z_vah(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_va(v1, v2, v3, VRET_HW); }        // vector element type 'H'
+inline void Assembler::z_vaf(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_va(v1, v2, v3, VRET_FW); }        // vector element type 'F'
+inline void Assembler::z_vag(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_va(v1, v2, v3, VRET_DW); }        // vector element type 'G'
+inline void Assembler::z_vaq(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_va(v1, v2, v3, VRET_QW); }        // vector element type 'Q'
+inline void Assembler::z_vacc(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VACC_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vaccb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vacc(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vacch(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vacc(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vaccf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vacc(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vaccg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vacc(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+inline void Assembler::z_vaccq(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vacc(v1, v2, v3, VRET_QW); }      // vector element type 'Q'
+
+// SUB
+inline void Assembler::z_vs(     VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VS_ZOPC    | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vsb(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vs(v1, v2, v3, VRET_BYTE); }      // vector element type 'B'
+inline void Assembler::z_vsh(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vs(v1, v2, v3, VRET_HW); }        // vector element type 'H'
+inline void Assembler::z_vsf(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vs(v1, v2, v3, VRET_FW); }        // vector element type 'F'
+inline void Assembler::z_vsg(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vs(v1, v2, v3, VRET_DW); }        // vector element type 'G'
+inline void Assembler::z_vsq(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vs(v1, v2, v3, VRET_QW); }        // vector element type 'Q'
+inline void Assembler::z_vscbi(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSCBI_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vscbi(v1, v2, v3, VRET_BYTE); }   // vector element type 'B'
+inline void Assembler::z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vscbi(v1, v2, v3, VRET_HW); }     // vector element type 'H'
+inline void Assembler::z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vscbi(v1, v2, v3, VRET_FW); }     // vector element type 'F'
+inline void Assembler::z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vscbi(v1, v2, v3, VRET_DW); }     // vector element type 'G'
+inline void Assembler::z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vscbi(v1, v2, v3, VRET_QW); }     // vector element type 'Q'
+
+// MULTIPLY
+inline void Assembler::z_vml(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VML_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmh(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMH_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmlh(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLH_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vme(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VME_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmle(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLE_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmo(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMO_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmlo(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLO_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+
+// MULTIPLY & ADD
+inline void Assembler::z_vmal(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAL_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmah(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAH_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmalh(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALH_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmae(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAE_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmale(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALE_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmao(   VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAO_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmalo(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALO_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+
+// VECTOR SUM
+inline void Assembler::z_vsum(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUM_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_HW, 32)); }
+inline void Assembler::z_vsumb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsum(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vsumh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsum(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vsumg(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMG_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW,   VRET_FW, 32)); }
+inline void Assembler::z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsumg(v1, v2, v3, VRET_HW); }     // vector element type 'B'
+inline void Assembler::z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsumg(v1, v2, v3, VRET_FW); }     // vector element type 'H'
+inline void Assembler::z_vsumq(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMQ_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW,   VRET_DW, 32)); }
+inline void Assembler::z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsumq(v1, v2, v3, VRET_FW); }     // vector element type 'B'
+inline void Assembler::z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vsumq(v1, v2, v3, VRET_DW); }     // vector element type 'H'
+
+// Average
+inline void Assembler::z_vavg(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVG_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vavgb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavg(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vavgh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavg(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vavgf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavg(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vavgg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavg(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+inline void Assembler::z_vavgl(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVGL_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavgl(v1, v2, v3, VRET_BYTE); }   // vector element type 'B'
+inline void Assembler::z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavgl(v1, v2, v3, VRET_HW); }     // vector element type 'H'
+inline void Assembler::z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavgl(v1, v2, v3, VRET_FW); }     // vector element type 'F'
+inline void Assembler::z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vavgl(v1, v2, v3, VRET_DW); }     // vector element type 'G'
+
+// VECTOR Galois Field Multiply Sum
+inline void Assembler::z_vgfm(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VGFM_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vgfmb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vgfm(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vgfmh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vgfm(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vgfmf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vgfm(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vgfmg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vgfm(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+inline void Assembler::z_vgfma(  VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VGFMA_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v3, 16) | vesc_mask(m5, VRET_BYTE, VRET_DW, 20)); }
+inline void Assembler::z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_HW); }   // vector element type 'H'
+inline void Assembler::z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_FW); }   // vector element type 'F'
+inline void Assembler::z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_DW); }   // vector element type 'G'
+
+//---<  Vector Logical Instructions  >---
+
+// AND
+inline void Assembler::z_vn(     VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VN_ZOPC    | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vnc(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VNC_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// XOR
+inline void Assembler::z_vx(     VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VX_ZOPC    | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// NOR
+inline void Assembler::z_vno(    VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VNO_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// OR
+inline void Assembler::z_vo(     VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VO_ZOPC    | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// Comparison (element-wise)
+inline void Assembler::z_vceq(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCEQ_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | vccc_mask(cc5, 24)); }
+inline void Assembler::z_vceqb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_BYTE, VOP_CCIGN); } // vector element type 'B', don't set CC
+inline void Assembler::z_vceqh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_HW,   VOP_CCIGN); } // vector element type 'H', don't set CC
+inline void Assembler::z_vceqf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_FW,   VOP_CCIGN); } // vector element type 'F', don't set CC
+inline void Assembler::z_vceqg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_DW,   VOP_CCIGN); } // vector element type 'G', don't set CC
+inline void Assembler::z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_BYTE, VOP_CCSET); } // vector element type 'B', don't set CC
+inline void Assembler::z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_HW,   VOP_CCSET); } // vector element type 'H', don't set CC
+inline void Assembler::z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_FW,   VOP_CCSET); } // vector element type 'F', don't set CC
+inline void Assembler::z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vceq(v1, v2, v3, VRET_DW,   VOP_CCSET); } // vector element type 'G', don't set CC
+inline void Assembler::z_vch(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCH_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | vccc_mask(cc5, 24)); }
+inline void Assembler::z_vchb(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_BYTE,  VOP_CCIGN); }  // vector element type 'B', don't set CC
+inline void Assembler::z_vchh(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_HW,    VOP_CCIGN); }  // vector element type 'H', don't set CC
+inline void Assembler::z_vchf(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_FW,    VOP_CCIGN); }  // vector element type 'F', don't set CC
+inline void Assembler::z_vchg(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_DW,    VOP_CCIGN); }  // vector element type 'G', don't set CC
+inline void Assembler::z_vchbs(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_BYTE,  VOP_CCSET); }  // vector element type 'B', don't set CC
+inline void Assembler::z_vchhs(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_HW,    VOP_CCSET); }  // vector element type 'H', don't set CC
+inline void Assembler::z_vchfs(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_FW,    VOP_CCSET); }  // vector element type 'F', don't set CC
+inline void Assembler::z_vchgs(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vch(v1, v2, v3, VRET_DW,    VOP_CCSET); }  // vector element type 'G', don't set CC
+inline void Assembler::z_vchl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCHL_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | vccc_mask(cc5, 24)); }
+inline void Assembler::z_vchlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_BYTE, VOP_CCIGN); }  // vector element type 'B', don't set CC
+inline void Assembler::z_vchlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_HW,   VOP_CCIGN); }  // vector element type 'H', don't set CC
+inline void Assembler::z_vchlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_FW,   VOP_CCIGN); }  // vector element type 'F', don't set CC
+inline void Assembler::z_vchlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_DW,   VOP_CCIGN); }  // vector element type 'G', don't set CC
+inline void Assembler::z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_BYTE, VOP_CCSET); }  // vector element type 'B', don't set CC
+inline void Assembler::z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_HW,   VOP_CCSET); }  // vector element type 'H', don't set CC
+inline void Assembler::z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_FW,   VOP_CCSET); }  // vector element type 'F', don't set CC
+inline void Assembler::z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vchl(v1, v2, v3, VRET_DW,   VOP_CCSET); }  // vector element type 'G', don't set CC
+
+// Max/Min (element-wise)
+inline void Assembler::z_vmx(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMX_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmxb(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmx(v1, v2, v3, VRET_BYTE); }     // vector element type 'B'
+inline void Assembler::z_vmxh(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmx(v1, v2, v3, VRET_HW); }       // vector element type 'H'
+inline void Assembler::z_vmxf(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmx(v1, v2, v3, VRET_FW); }       // vector element type 'F'
+inline void Assembler::z_vmxg(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmx(v1, v2, v3, VRET_DW); }       // vector element type 'G'
+inline void Assembler::z_vmxl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMXL_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmxlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmxl(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vmxlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmxl(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vmxlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmxl(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vmxlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmxl(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+inline void Assembler::z_vmn(    VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMN_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmnb(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmn(v1, v2, v3, VRET_BYTE); }     // vector element type 'B'
+inline void Assembler::z_vmnh(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmn(v1, v2, v3, VRET_HW); }       // vector element type 'H'
+inline void Assembler::z_vmnf(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmn(v1, v2, v3, VRET_FW); }       // vector element type 'F'
+inline void Assembler::z_vmng(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmn(v1, v2, v3, VRET_DW); }       // vector element type 'G'
+inline void Assembler::z_vmnl(   VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMNL_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmnlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmnl(v1, v2, v3, VRET_BYTE); }    // vector element type 'B'
+inline void Assembler::z_vmnlh(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmnl(v1, v2, v3, VRET_HW); }      // vector element type 'H'
+inline void Assembler::z_vmnlf(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmnl(v1, v2, v3, VRET_FW); }      // vector element type 'F'
+inline void Assembler::z_vmnlg(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vmnl(v1, v2, v3, VRET_DW); }      // vector element type 'G'
+
+// Leading/Trailing Zeros, population count
+inline void Assembler::z_vclz(   VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VCLZ_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vclzb(  VectorRegister v1, VectorRegister v2)                                {z_vclz(v1, v2, VRET_BYTE); }        // vector element type 'B'
+inline void Assembler::z_vclzh(  VectorRegister v1, VectorRegister v2)                                {z_vclz(v1, v2, VRET_HW); }          // vector element type 'H'
+inline void Assembler::z_vclzf(  VectorRegister v1, VectorRegister v2)                                {z_vclz(v1, v2, VRET_FW); }          // vector element type 'F'
+inline void Assembler::z_vclzg(  VectorRegister v1, VectorRegister v2)                                {z_vclz(v1, v2, VRET_DW); }          // vector element type 'G'
+inline void Assembler::z_vctz(   VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VCTZ_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vctzb(  VectorRegister v1, VectorRegister v2)                                {z_vctz(v1, v2, VRET_BYTE); }        // vector element type 'B'
+inline void Assembler::z_vctzh(  VectorRegister v1, VectorRegister v2)                                {z_vctz(v1, v2, VRET_HW); }          // vector element type 'H'
+inline void Assembler::z_vctzf(  VectorRegister v1, VectorRegister v2)                                {z_vctz(v1, v2, VRET_FW); }          // vector element type 'F'
+inline void Assembler::z_vctzg(  VectorRegister v1, VectorRegister v2)                                {z_vctz(v1, v2, VRET_DW); }          // vector element type 'G'
+inline void Assembler::z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3)                    {emit_48(VPOPCT_ZOPC| vreg(v1,  8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+
+// Rotate/Shift
+inline void Assembler::z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4) {emit_48(VERLLV_ZOPC| vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_verllv(v1, v2, v3, VRET_BYTE); }  // vector element type 'B'
+inline void Assembler::z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_verllv(v1, v2, v3, VRET_HW); }    // vector element type 'H'
+inline void Assembler::z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_verllv(v1, v2, v3, VRET_FW); }    // vector element type 'F'
+inline void Assembler::z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_verllv(v1, v2, v3, VRET_DW); }    // vector element type 'G'
+inline void Assembler::z_verll(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4) {emit_48(VERLL_ZOPC | vreg(v1,  8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_verll(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
+inline void Assembler::z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_verll(v1, v3, d2, b2, VRET_HW);}  // vector element type 'H'
+inline void Assembler::z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_verll(v1, v3, d2, b2, VRET_FW);}  // vector element type 'F'
+inline void Assembler::z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_verll(v1, v3, d2, b2, VRET_DW);}  // vector element type 'G'
+inline void Assembler::z_verim(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5) {emit_48(VERLL_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | uimm8(imm4, 24, 48) | vesc_mask(m5, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_BYTE); }   // vector element type 'B'
+inline void Assembler::z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_HW); }     // vector element type 'H'
+inline void Assembler::z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_FW); }     // vector element type 'F'
+inline void Assembler::z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_DW); }     // vector element type 'G'
+
+inline void Assembler::z_veslv(  VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4) {emit_48(VESLV_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_veslv(v1, v2, v3, VRET_BYTE); }   // vector element type 'B'
+inline void Assembler::z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_veslv(v1, v2, v3, VRET_HW); }     // vector element type 'H'
+inline void Assembler::z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_veslv(v1, v2, v3, VRET_FW); }     // vector element type 'F'
+inline void Assembler::z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_veslv(v1, v2, v3, VRET_DW); }     // vector element type 'G'
+inline void Assembler::z_vesl(   VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4) {emit_48(VESL_ZOPC  | vreg(v1,  8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_veslb(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesl(v1, v3, d2, b2, VRET_BYTE);} // vector element type 'B'
+inline void Assembler::z_veslh(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesl(v1, v3, d2, b2, VRET_HW);}   // vector element type 'H'
+inline void Assembler::z_veslf(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesl(v1, v3, d2, b2, VRET_FW);}   // vector element type 'F'
+inline void Assembler::z_veslg(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesl(v1, v3, d2, b2, VRET_DW);}   // vector element type 'G'
+
+inline void Assembler::z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4) {emit_48(VESRAV_ZOPC| vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrav(v1, v2, v3, VRET_BYTE); }  // vector element type 'B'
+inline void Assembler::z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrav(v1, v2, v3, VRET_HW); }    // vector element type 'H'
+inline void Assembler::z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrav(v1, v2, v3, VRET_FW); }    // vector element type 'F'
+inline void Assembler::z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrav(v1, v2, v3, VRET_DW); }    // vector element type 'G'
+inline void Assembler::z_vesra(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4) {emit_48(VESRA_ZOPC | vreg(v1,  8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesra(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
+inline void Assembler::z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesra(v1, v3, d2, b2, VRET_HW);}  // vector element type 'H'
+inline void Assembler::z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesra(v1, v3, d2, b2, VRET_FW);}  // vector element type 'F'
+inline void Assembler::z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesra(v1, v3, d2, b2, VRET_DW);}  // vector element type 'G'
+inline void Assembler::z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3,               int64_t m4) {emit_48(VESRLV_ZOPC| vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)      | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrlv(v1, v2, v3, VRET_BYTE); }  // vector element type 'B'
+inline void Assembler::z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrlv(v1, v2, v3, VRET_HW); }    // vector element type 'H'
+inline void Assembler::z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrlv(v1, v2, v3, VRET_FW); }    // vector element type 'F'
+inline void Assembler::z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3)             {z_vesrlv(v1, v2, v3, VRET_DW); }    // vector element type 'G'
+inline void Assembler::z_vesrl(  VectorRegister v1, VectorRegister v3, int64_t d2, Register b2,         int64_t m4) {emit_48(VESRL_ZOPC | vreg(v1,  8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesrl(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
+inline void Assembler::z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesrl(v1, v3, d2, b2, VRET_HW);}  // vector element type 'H'
+inline void Assembler::z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesrl(v1, v3, d2, b2, VRET_FW);}  // vector element type 'F'
+inline void Assembler::z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2)       {z_vesrl(v1, v3, d2, b2, VRET_DW);}  // vector element type 'G'
+
+inline void Assembler::z_vsl(    VectorRegister v1, VectorRegister v2, VectorRegister v3)               {emit_48(VSL_ZOPC   | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vslb(   VectorRegister v1, VectorRegister v2, VectorRegister v3)               {emit_48(VSLB_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsldb(  VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {emit_48(VSLDB_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48)); }
+
+inline void Assembler::z_vsra(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VSRA_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsrab(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VSRAB_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsrl(   VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VSRL_ZOPC  | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsrlb(  VectorRegister v1, VectorRegister v2, VectorRegister v3)             {emit_48(VSRLB_ZOPC | vreg(v1,  8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// Test under Mask
+inline void Assembler::z_vtm(    VectorRegister v1, VectorRegister v2)                                {emit_48(VTM_ZOPC   | vreg(v1,  8) | vreg(v2, 12)); }
+
+
 //-------------------------------
 // FLOAT INSTRUCTIONS
 //-------------------------------
--- a/src/hotspot/cpu/s390/register_definitions_s390.cpp
+++ b/src/hotspot/cpu/s390/register_definitions_s390.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -35,3 +35,5 @@
 REGISTER_DEFINITION(Register, noreg);

 REGISTER_DEFINITION(FloatRegister, fnoreg);
+
+REGISTER_DEFINITION(VectorRegister, vnoreg);
--- a/src/hotspot/cpu/s390/register_s390.cpp
+++ b/src/hotspot/cpu/s390/register_s390.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -46,3 +46,13 @@ const char* FloatRegisterImpl::name() const {
  };
  return is_valid() ? names[encoding()] : "fnoreg";
 }
+
+const char* VectorRegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "Z_V0",  "Z_V1",  "Z_V2",  "Z_V3",  "Z_V4",  "Z_V5",  "Z_V6",  "Z_V7",
+    "Z_V8",  "Z_V9",  "Z_V10", "Z_V11", "Z_V12", "Z_V13", "Z_V14", "Z_V15",
+    "Z_V16", "Z_V17", "Z_V18", "Z_V19", "Z_V20", "Z_V21", "Z_V22", "Z_V23",
+    "Z_V24", "Z_V25", "Z_V26", "Z_V27", "Z_V28", "Z_V29", "Z_V30", "Z_V31"
+  };
+  return is_valid() ? names[encoding()] : "fnoreg";
+}
--- a/src/hotspot/cpu/s390/register_s390.hpp
+++ b/src/hotspot/cpu/s390/register_s390.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -34,11 +34,6 @@ class VMRegImpl;

 typedef VMRegImpl* VMReg;

-// Use Register as shortcut.
-class RegisterImpl;
-typedef RegisterImpl* Register;
-
-// The implementation of integer registers for z/Architecture.

 // z/Architecture registers, see "LINUX for zSeries ELF ABI Supplement", IBM March 2001
 //
@ -57,6 +52,17 @@ typedef RegisterImpl* Register;
 //   f1,f3,f5,f7 General purpose (volatile)
 //   f8-f15      General purpose (nonvolatile)

+
+//===========================
+//===  Integer Registers  ===
+//===========================
+
+// Use Register as shortcut.
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+// The implementation of integer registers for z/Architecture.
+
 inline Register as_Register(int encoding) {
  return (Register)(long)encoding;
 }
@ -110,6 +116,11 @@ CONSTANT_REGISTER_DECLARATION(Register, Z_R13, (13));
 CONSTANT_REGISTER_DECLARATION(Register, Z_R14, (14));
 CONSTANT_REGISTER_DECLARATION(Register, Z_R15, (15));

+
+//=============================
+//===  Condition Registers  ===
+//=============================
+
 // Use ConditionRegister as shortcut
 class ConditionRegisterImpl;
 typedef ConditionRegisterImpl* ConditionRegister;
@ -159,7 +170,7 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
 // dangers of defines.
 // If a particular file has a problem with these defines then it's possible
 // to turn them off in that file by defining
-// DONT_USE_REGISTER_DEFINES. Register_definition_s390.cpp does that
+// DONT_USE_REGISTER_DEFINES. Register_definitions_s390.cpp does that
 // so that it's able to provide real definitions of these registers
 // for use in debuggers and such.

@ -186,6 +197,11 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
 #define Z_CR ((ConditionRegister)(Z_CR_ConditionRegisterEnumValue))
 #endif // DONT_USE_REGISTER_DEFINES

+
+//=========================
+//===  Float Registers  ===
+//=========================
+
 // Use FloatRegister as shortcut
 class FloatRegisterImpl;
 typedef FloatRegisterImpl* FloatRegister;
@ -263,22 +279,6 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F15, (15));
 #define Z_F15 ((FloatRegister)(  Z_F15_FloatRegisterEnumValue))
 #endif // DONT_USE_REGISTER_DEFINES

-// Need to know the total number of registers of all sorts for SharedInfo.
-// Define a class that exports it.
-
-class ConcreteRegisterImpl : public AbstractRegisterImpl {
- public:
-  enum {
-    number_of_registers =
-      (RegisterImpl::number_of_registers +
-      FloatRegisterImpl::number_of_registers)
-      * 2 // register halves
-      + 1 // condition code register
-  };
-  static const int max_gpr;
-  static const int max_fpr;
-};
-
 // Single, Double and Quad fp reg classes. These exist to map the ADLC
 // encoding for a floating point register, to the FloatRegister number
 // desired by the macroassembler. A FloatRegister is a number between
@ -329,6 +329,161 @@ class QuadFloatRegisterImpl {
 };


+//==========================
+//===  Vector Registers  ===
+//==========================
+
+// Use VectorRegister as shortcut
+class VectorRegisterImpl;
+typedef VectorRegisterImpl* VectorRegister;
+
+// The implementation of vector registers for z/Architecture.
+
+inline VectorRegister as_VectorRegister(int encoding) {
+  return (VectorRegister)(long)encoding;
+}
+
+class VectorRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers     = 32,
+    number_of_arg_registers = 0
+  };
+
+  // construction
+  inline friend VectorRegister as_VectorRegister(int encoding);
+
+  inline VMReg as_VMReg();
+
+  // accessors
+  int encoding() const                                {
+     assert(is_valid(), "invalid register"); return value();
+  }
+
+  bool is_valid() const           { return  0 <= value() && value() < number_of_registers; }
+  bool is_volatile() const        { return true; }
+  bool is_nonvolatile() const     { return false; }
+
+  // Register fields in z/Architecture instructions are 4 bits wide, restricting the
+  // addressable register set size to 16.
+  // The vector register set size is 32, requiring an extension, by one bit, of the
+  // register encoding. This is accomplished by the introduction of a RXB field in the
+  // instruction. RXB = Register eXtension Bits.
+  // The RXB field contains the MSBs (most significant bit) of the vector register numbers
+  // used for this instruction. Assignment of MSB in RBX is by bit position of the
+  // register field in the instruction.
+  // Example:
+  //   The register field starting at bit position 12 in the instruction is assigned RXB bit 0b0100.
+  int64_t RXB_mask(int pos) {
+    if (encoding() >= number_of_registers/2) {
+      switch (pos) {
+        case 8:   return ((int64_t)0b1000) << 8; // actual bit pos: 36
+        case 12:  return ((int64_t)0b0100) << 8; // actual bit pos: 37
+        case 16:  return ((int64_t)0b0010) << 8; // actual bit pos: 38
+        case 32:  return ((int64_t)0b0001) << 8; // actual bit pos: 39
+        default:
+          ShouldNotReachHere();
+      }
+    }
+    return 0;
+  }
+
+  const char* name() const;
+
+  VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
+};
+
+// The Vector registers of z/Architecture.
+
+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg, (-1));
+
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V0,  (0));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V1,  (1));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V2,  (2));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V3,  (3));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V4,  (4));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V5,  (5));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V6,  (6));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V7,  (7));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V8,  (8));
+CONSTANT_REGISTER_DECLARATION(VectorRegister,  Z_V9,  (9));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V10, (10));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V11, (11));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V12, (12));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V13, (13));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V14, (14));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V15, (15));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V16, (16));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V17, (17));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V18, (18));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V19, (19));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V20, (20));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V21, (21));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V22, (22));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V23, (23));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V24, (24));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V25, (25));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V26, (26));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V27, (27));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V28, (28));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V29, (29));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V30, (30));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V31, (31));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define vnoreg ((VectorRegister)(vnoreg_VectorRegisterEnumValue))
+#define Z_V0  ((VectorRegister)(   Z_V0_VectorRegisterEnumValue))
+#define Z_V1  ((VectorRegister)(   Z_V1_VectorRegisterEnumValue))
+#define Z_V2  ((VectorRegister)(   Z_V2_VectorRegisterEnumValue))
+#define Z_V3  ((VectorRegister)(   Z_V3_VectorRegisterEnumValue))
+#define Z_V4  ((VectorRegister)(   Z_V4_VectorRegisterEnumValue))
+#define Z_V5  ((VectorRegister)(   Z_V5_VectorRegisterEnumValue))
+#define Z_V6  ((VectorRegister)(   Z_V6_VectorRegisterEnumValue))
+#define Z_V7  ((VectorRegister)(   Z_V7_VectorRegisterEnumValue))
+#define Z_V8  ((VectorRegister)(   Z_V8_VectorRegisterEnumValue))
+#define Z_V9  ((VectorRegister)(   Z_V9_VectorRegisterEnumValue))
+#define Z_V10 ((VectorRegister)(  Z_V10_VectorRegisterEnumValue))
+#define Z_V11 ((VectorRegister)(  Z_V11_VectorRegisterEnumValue))
+#define Z_V12 ((VectorRegister)(  Z_V12_VectorRegisterEnumValue))
+#define Z_V13 ((VectorRegister)(  Z_V13_VectorRegisterEnumValue))
+#define Z_V14 ((VectorRegister)(  Z_V14_VectorRegisterEnumValue))
+#define Z_V15 ((VectorRegister)(  Z_V15_VectorRegisterEnumValue))
+#define Z_V16 ((VectorRegister)(  Z_V16_VectorRegisterEnumValue))
+#define Z_V17 ((VectorRegister)(  Z_V17_VectorRegisterEnumValue))
+#define Z_V18 ((VectorRegister)(  Z_V18_VectorRegisterEnumValue))
+#define Z_V19 ((VectorRegister)(  Z_V19_VectorRegisterEnumValue))
+#define Z_V20 ((VectorRegister)(  Z_V20_VectorRegisterEnumValue))
+#define Z_V21 ((VectorRegister)(  Z_V21_VectorRegisterEnumValue))
+#define Z_V22 ((VectorRegister)(  Z_V22_VectorRegisterEnumValue))
+#define Z_V23 ((VectorRegister)(  Z_V23_VectorRegisterEnumValue))
+#define Z_V24 ((VectorRegister)(  Z_V24_VectorRegisterEnumValue))
+#define Z_V25 ((VectorRegister)(  Z_V25_VectorRegisterEnumValue))
+#define Z_V26 ((VectorRegister)(  Z_V26_VectorRegisterEnumValue))
+#define Z_V27 ((VectorRegister)(  Z_V27_VectorRegisterEnumValue))
+#define Z_V28 ((VectorRegister)(  Z_V28_VectorRegisterEnumValue))
+#define Z_V29 ((VectorRegister)(  Z_V29_VectorRegisterEnumValue))
+#define Z_V30 ((VectorRegister)(  Z_V30_VectorRegisterEnumValue))
+#define Z_V31 ((VectorRegister)(  Z_V31_VectorRegisterEnumValue))
+#endif // DONT_USE_REGISTER_DEFINES
+
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers =
+      (RegisterImpl::number_of_registers +
+      FloatRegisterImpl::number_of_registers)
+      * 2 // register halves
+      + 1 // condition code register
+  };
+  static const int max_gpr;
+  static const int max_fpr;
+};
+
+
 // Common register declarations used in assembler code.
 REGISTER_DECLARATION(Register,      Z_EXC_OOP, Z_R2);
 REGISTER_DECLARATION(Register,      Z_EXC_PC,  Z_R3);
--- a/src/hotspot/cpu/sparc/assembler_sparc.hpp
+++ b/src/hotspot/cpu/sparc/assembler_sparc.hpp
@ -122,6 +122,7 @@ class Assembler : public AbstractAssembler {
    fpop1_op3    = 0x34,
    fpop2_op3    = 0x35,
    impdep1_op3  = 0x36,
+    addx_op3     = 0x36,
    aes3_op3     = 0x36,
    sha_op3      = 0x36,
    bmask_op3    = 0x36,
@ -133,6 +134,8 @@ class Assembler : public AbstractAssembler {
    fzero_op3    = 0x36,
    fsrc_op3     = 0x36,
    fnot_op3     = 0x36,
+    mpmul_op3    = 0x36,
+    umulx_op3    = 0x36,
    xmulx_op3    = 0x36,
    crc32c_op3   = 0x36,
    impdep2_op3  = 0x37,
@ -195,6 +198,9 @@ class Assembler : public AbstractAssembler {
    fnegs_opf          = 0x05,
    fnegd_opf          = 0x06,

+    addxc_opf          = 0x11,
+    addxccc_opf        = 0x13,
+    umulxhi_opf        = 0x16,
    alignaddr_opf      = 0x18,
    bmask_opf          = 0x19,

@ -240,7 +246,8 @@ class Assembler : public AbstractAssembler {
    sha256_opf         = 0x142,
    sha512_opf         = 0x143,

-    crc32c_opf         = 0x147
+    crc32c_opf         = 0x147,
+    mpmul_opf          = 0x148
  };

  enum op5s {
@ -380,7 +387,7 @@ class Assembler : public AbstractAssembler {
    assert_signed_range(x, nbits + 2);
  }

-  static void assert_unsigned_const(int x, int nbits) {
+  static void assert_unsigned_range(int x, int nbits) {
    assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
  }

@ -534,6 +541,12 @@ class Assembler : public AbstractAssembler {
    return x & ((1 << nbits) - 1);
  }

+  // unsigned immediate, in low bits, at most nbits long.
+  static int uimm(int x, int nbits) {
+    assert_unsigned_range(x, nbits);
+    return x & ((1 << nbits) - 1);
+  }
+
  // compute inverse of wdisp16
  static intptr_t inv_wdisp16(int x, intptr_t pos) {
    int lo = x & ((1 << 14) - 1);
@ -631,6 +644,9 @@ class Assembler : public AbstractAssembler {
  // FMAf instructions supported only on certain processors
  static void fmaf_only() { assert(VM_Version::has_fmaf(), "This instruction only works on SPARC with FMAf"); }

+  // MPMUL instruction supported only on certain processors
+  static void mpmul_only() { assert(VM_Version::has_mpmul(), "This instruction only works on SPARC with MPMUL"); }
+
  // instruction only in VIS1
  static void vis1_only() { assert(VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }

@ -772,11 +788,12 @@ class Assembler : public AbstractAssembler {
    AbstractAssembler::flush();
  }

-  inline void emit_int32(int);  // shadows AbstractAssembler::emit_int32
-  inline void emit_data(int);
-  inline void emit_data(int, RelocationHolder const &rspec);
-  inline void emit_data(int, relocInfo::relocType rtype);
-  // helper for above functions
+  inline void emit_int32(int32_t);  // shadows AbstractAssembler::emit_int32
+  inline void emit_data(int32_t);
+  inline void emit_data(int32_t, RelocationHolder const&);
+  inline void emit_data(int32_t, relocInfo::relocType rtype);
+
+  // Helper for the above functions.
  inline void check_delay();


@ -929,6 +946,10 @@ class Assembler : public AbstractAssembler {
  // fmaf instructions.

  inline void fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+  inline void fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+
+  inline void fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+  inline void fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);

  // pp 165

@ -960,6 +981,8 @@ class Assembler : public AbstractAssembler {
  inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d,
                  RelocationHolder const &rspec = RelocationHolder());

+  inline void ldd(Register s1, Register s2, FloatRegister d);
+  inline void ldd(Register s1, int simm13a, FloatRegister d);

  inline void ldfsr(Register s1, Register s2);
  inline void ldfsr(Register s1, int simm13a);
@ -987,8 +1010,6 @@ class Assembler : public AbstractAssembler {
  inline void lduw(Register s1, int simm13a, Register d);
  inline void ldx(Register s1, Register s2, Register d);
  inline void ldx(Register s1, int simm13a, Register d);
-  inline void ldd(Register s1, Register s2, Register d);
-  inline void ldd(Register s1, int simm13a, Register d);

  // pp 177

@ -1157,6 +1178,9 @@ class Assembler : public AbstractAssembler {
  inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2);
  inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a);

+  inline void std(FloatRegister d, Register s1, Register s2);
+  inline void std(FloatRegister d, Register s1, int simm13a);
+
  inline void stfsr(Register s1, Register s2);
  inline void stfsr(Register s1, int simm13a);
  inline void stxfsr(Register s1, Register s2);
@ -1177,8 +1201,6 @@ class Assembler : public AbstractAssembler {
  inline void stw(Register d, Register s1, int simm13a);
  inline void stx(Register d, Register s1, Register s2);
  inline void stx(Register d, Register s1, int simm13a);
-  inline void std(Register d, Register s1, Register s2);
-  inline void std(Register d, Register s1, int simm13a);

  // pp 177

@ -1267,6 +1289,9 @@ class Assembler : public AbstractAssembler {

  // VIS3 instructions

+  inline void addxc(Register s1, Register s2, Register d);
+  inline void addxccc(Register s1, Register s2, Register d);
+
  inline void movstosw(FloatRegister s, Register d);
  inline void movstouw(FloatRegister s, Register d);
  inline void movdtox(FloatRegister s, Register d);
@ -1276,6 +1301,7 @@ class Assembler : public AbstractAssembler {

  inline void xmulx(Register s1, Register s2, Register d);
  inline void xmulxhi(Register s1, Register s2, Register d);
+  inline void umulxhi(Register s1, Register s2, Register d);

  // Crypto SHA instructions

@ -1287,6 +1313,10 @@ class Assembler : public AbstractAssembler {

  inline void crc32c(FloatRegister s1, FloatRegister s2, FloatRegister d);

+  // MPMUL instruction
+
+  inline void mpmul(int uimm5);
+
  // Creation
  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 #ifdef VALIDATE_PIPELINE
--- a/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp
+++ b/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp
@ -59,7 +59,7 @@ inline void Assembler::check_delay() {
 #endif
 }

-inline void Assembler::emit_int32(int x) {
+inline void Assembler::emit_int32(int32_t x) {
  check_delay();
 #ifdef VALIDATE_PIPELINE
  _hazard_state = NoHazard;
@ -67,16 +67,16 @@ inline void Assembler::emit_int32(int x) {
  AbstractAssembler::emit_int32(x);
 }

-inline void Assembler::emit_data(int x) {
+inline void Assembler::emit_data(int32_t x) {
  emit_int32(x);
 }

-inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
+inline void Assembler::emit_data(int32_t x, relocInfo::relocType rtype) {
  relocate(rtype);
  emit_int32(x);
 }

-inline void Assembler::emit_data(int x, RelocationHolder const &rspec) {
+inline void Assembler::emit_data(int32_t x, RelocationHolder const &rspec) {
  relocate(rspec);
  emit_int32(x);
 }
@ -359,6 +359,19 @@ inline void Assembler::fmadd(FloatRegisterImpl::Width w, FloatRegister s1, Float
  fmaf_only();
  emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(w) | fs2(s2, w));
 }
+inline void Assembler::fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+  fmaf_only();
+  emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x4 + w) | fs2(s2, w));
+}
+
+inline void Assembler::fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+  fmaf_only();
+  emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0xc + w) | fs2(s2, w));
+}
+inline void Assembler::fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+  fmaf_only();
+  emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x8 + w) | fs2(s2, w));
+}

 inline void Assembler::flush(Register s1, Register s2) {
  emit_int32(op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2));
@ -402,6 +415,15 @@ inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a,
  emit_data(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);
 }

+inline void Assembler::ldd(Register s1, Register s2, FloatRegister d) {
+  assert(d->is_even(), "not even");
+  ldf(FloatRegisterImpl::D, s1, s2, d);
+}
+inline void Assembler::ldd(Register s1, int simm13a, FloatRegister d) {
+  assert(d->is_even(), "not even");
+  ldf(FloatRegisterImpl::D, s1, simm13a, d);
+}
+
 inline void Assembler::ldxfsr(Register s1, Register s2) {
  emit_int32(op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2));
 }
@ -460,16 +482,6 @@ inline void Assembler::ldx(Register s1, Register s2, Register d) {
 inline void Assembler::ldx(Register s1, int simm13a, Register d) {
  emit_data(op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
 }
-inline void Assembler::ldd(Register s1, Register s2, Register d) {
-  v9_dep();
-  assert(d->is_even(), "not even");
-  emit_int32(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2));
-}
-inline void Assembler::ldd(Register s1, int simm13a, Register d) {
-  v9_dep();
-  assert(d->is_even(), "not even");
-  emit_data(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
-}

 inline void Assembler::ldsba(Register s1, Register s2, int ia, Register d) {
  emit_int32(op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@ -806,6 +818,15 @@ inline void Assembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register
  emit_data(op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13));
 }

+inline void Assembler::std(FloatRegister d, Register s1, Register s2) {
+  assert(d->is_even(), "not even");
+  stf(FloatRegisterImpl::D, d, s1, s2);
+}
+inline void Assembler::std(FloatRegister d, Register s1, int simm13a) {
+  assert(d->is_even(), "not even");
+  stf(FloatRegisterImpl::D, d, s1, simm13a);
+}
+
 inline void Assembler::stxfsr(Register s1, Register s2) {
  emit_int32(op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2));
 }
@ -848,16 +869,6 @@ inline void Assembler::stx(Register d, Register s1, Register s2) {
 inline void Assembler::stx(Register d, Register s1, int simm13a) {
  emit_data(op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
 }
-inline void Assembler::std(Register d, Register s1, Register s2) {
-  v9_dep();
-  assert(d->is_even(), "not even");
-  emit_int32(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2));
-}
-inline void Assembler::std(Register d, Register s1, int simm13a) {
-  v9_dep();
-  assert(d->is_even(), "not even");
-  emit_data(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
-}

 inline void Assembler::stba(Register d, Register s1, Register s2, int ia) {
  emit_int32(op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@ -1043,6 +1054,15 @@ inline void Assembler::bshuffle(FloatRegister s1, FloatRegister s2, FloatRegiste

 // VIS3 instructions

+inline void Assembler::addxc(Register s1, Register s2, Register d) {
+  vis3_only();
+  emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxc_opf) | rs2(s2));
+}
+inline void Assembler::addxccc(Register s1, Register s2, Register d) {
+  vis3_only();
+  emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxccc_opf) | rs2(s2));
+}
+
 inline void Assembler::movstosw(FloatRegister s, Register d) {
  vis3_only();
  emit_int32(op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S));
@ -1073,6 +1093,10 @@ inline void Assembler::xmulxhi(Register s1, Register s2, Register d) {
  vis3_only();
  emit_int32(op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2));
 }
+inline void Assembler::umulxhi(Register s1, Register s2, Register d) {
+  vis3_only();
+  emit_int32(op(arith_op) | rd(d) | op3(umulx_op3) | rs1(s1) | opf(umulxhi_opf) | rs2(s2));
+}

 // Crypto SHA instructions

@ -1096,4 +1120,11 @@ inline void Assembler::crc32c(FloatRegister s1, FloatRegister s2, FloatRegister
  emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D));
 }

+// MPMUL instruction
+
+inline void Assembler::mpmul(int uimm5) {
+  mpmul_only();
+  emit_int32(op(arith_op) | rd(0) | op3(mpmul_op3) | rs1(0) | opf(mpmul_opf) | uimm(uimm5, 5));
+}
+
 #endif // CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP
--- a/src/hotspot/cpu/sparc/frame_sparc.cpp
+++ b/src/hotspot/cpu/sparc/frame_sparc.cpp
@ -119,8 +119,8 @@ address RegisterMap::pd_location(VMReg regname) const {
    reg = regname->as_Register();
  }
  if (reg->is_out()) {
-    assert(_younger_window != NULL, "Younger window should be available");
-    return second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
+    return _younger_window == NULL ? NULL :
+      second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
  }
  if (reg->is_local() || reg->is_in()) {
    assert(_window != NULL, "Window should be available");
--- a/src/hotspot/cpu/sparc/globals_sparc.hpp
+++ b/src/hotspot/cpu/sparc/globals_sparc.hpp
@ -97,12 +97,15 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
                   writeable) \
                                                                            \
  product(intx, UseVIS, 99,                                                 \
-          "Highest supported VIS instructions set on Sparc")                \
+          "Highest supported VIS instructions set on SPARC")                \
          range(0, 99)                                                      \
                                                                            \
  product(bool, UseCBCond, false,                                           \
          "Use compare and branch instruction on SPARC")                    \
                                                                            \
+  product(bool, UseMPMUL, false,                                            \
+          "Use multi-precision multiply instruction (mpmul) on SPARC")      \
+                                                                            \
  product(bool, UseBlockZeroing, false,                                     \
          "Use special cpu instructions for block zeroing")                 \
                                                                            \
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
@ -1574,29 +1574,39 @@ void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
  assert_not_delayed();
  if (use_cbcond(L)) {
    Assembler::cbcond(zero, ptr_cc, s1, 0, L);
-    return;
+  } else {
+    br_null(s1, false, p, L);
+    delayed()->nop();
  }
-  br_null(s1, false, p, L);
-  delayed()->nop();
 }

 void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
  assert_not_delayed();
  if (use_cbcond(L)) {
    Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
-    return;
+  } else {
+    br_notnull(s1, false, p, L);
+    delayed()->nop();
  }
-  br_notnull(s1, false, p, L);
-  delayed()->nop();
 }

 // Unconditional short branch
 void MacroAssembler::ba_short(Label& L) {
+  assert_not_delayed();
  if (use_cbcond(L)) {
    Assembler::cbcond(equal, icc, G0, G0, L);
-    return;
+  } else {
+    br(always, false, pt, L);
+    delayed()->nop();
  }
-  br(always, false, pt, L);
+}
+
+// Branch if 'icc' says zero or not (i.e. icc.z == 1|0).
+
+void MacroAssembler::br_icc_zero(bool iszero, Predict p, Label &L) {
+  assert_not_delayed();
+  Condition cf = (iszero ? Assembler::zero : Assembler::notZero);
+  br(cf, false, p, L);
  delayed()->nop();
 }

--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp
@ -606,7 +606,7 @@ class MacroAssembler : public Assembler {
  // offset.  No explicit code generation is needed if the offset is within a certain
  // range (0 <= offset <= page_size).
  //
-  // %%%%%% Currently not done for SPARC
+  // FIXME: Currently not done for SPARC

  void null_check(Register reg, int offset = -1);
  static bool needs_explicit_null_check(intptr_t offset);
@ -648,6 +648,9 @@ class MacroAssembler : public Assembler {
  // unconditional short branch
  void ba_short(Label& L);

+  // Branch on icc.z (true or not).
+  void br_icc_zero(bool iszero, Predict p, Label &L);
+
  inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
  inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );

@ -663,19 +666,19 @@ class MacroAssembler : public Assembler {
  inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );

  // Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual)
-  inline void cmp(  Register s1, Register s2 );
-  inline void cmp(  Register s1, int simm13a );
+  inline void cmp( Register s1, Register s2 );
+  inline void cmp( Register s1, int simm13a );

  inline void jmp( Register s1, Register s2 );
  inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );

  // Check if the call target is out of wdisp30 range (relative to the code cache)
  static inline bool is_far_target(address d);
-  inline void call( address d,  relocInfo::relocType rt = relocInfo::runtime_call_type );
-  inline void call( address d,  RelocationHolder const& rspec);
+  inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type );
+  inline void call( address d, RelocationHolder const& rspec);

-  inline void call( Label& L,   relocInfo::relocType rt = relocInfo::runtime_call_type );
-  inline void call( Label& L,  RelocationHolder const& rspec);
+  inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type );
+  inline void call( Label& L, RelocationHolder const& rspec);

  inline void callr( Register s1, Register s2 );
  inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp
@ -185,7 +185,7 @@ inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, reloc
 }

 inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
-  // See note[+] on 'avoid_pipeline_stalls()', in "assembler_sparc.inline.hpp".
+  // See note[+] on 'avoid_pipeline_stall()', in "assembler_sparc.inline.hpp".
  avoid_pipeline_stall();
  br(c, a, p, target(L));
 }
--- a/src/hotspot/cpu/sparc/register_sparc.hpp
+++ b/src/hotspot/cpu/sparc/register_sparc.hpp
@ -236,7 +236,7 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
  inline VMReg as_VMReg( );

  // accessors
-  int encoding() const                                { assert(is_valid(), "invalid register"); return value(); }
+  int encoding() const { assert(is_valid(), "invalid register"); return value(); }

 public:
  int encoding(Width w) const {
@ -258,10 +258,12 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
    return -1;
  }

-  bool  is_valid() const                              { return 0 <= value() && value() < number_of_registers; }
+  bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
+  bool is_even()  const { return (encoding() & 1) == 0; }
+
  const char* name() const;

-  FloatRegister successor() const                     { return as_FloatRegister(encoding() + 1); }
+  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
 };


--- a/src/hotspot/cpu/sparc/sparc.ad
+++ b/src/hotspot/cpu/sparc/sparc.ad
@ -2628,7 +2628,6 @@ enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
 %}


-
 enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
    MacroAssembler _masm(&cbuf);

@ -2651,7 +2650,71 @@ enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
    __ fmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
 %}

+enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+    MacroAssembler _masm(&cbuf);

+    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+    __ fmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+    __ fmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+    __ fnmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+    __ fnmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+    __ fnmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+    __ fnmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}


 enc_class fmovs (dflt_reg dst, dflt_reg src) %{
@ -7597,7 +7660,7 @@ instruct sqrtD_reg_reg(regD dst, regD src) %{
  ins_pipe(fdivD_reg_reg);
 %}

-// Single precision fused floating-point multiply-add (d = a * b + c).
+// Single/Double precision fused floating-point multiply-add (d = a * b + c).
 instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
  predicate(UseFMA);
  match(Set dst (FmaF c (Binary a b)));
@ -7606,7 +7669,6 @@ instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
  ins_pipe(fmaF_regx4);
 %}

-// Double precision fused floating-point multiply-add (d = a * b + c).
 instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
  predicate(UseFMA);
  match(Set dst (FmaD c (Binary a b)));
@ -7615,6 +7677,66 @@ instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
  ins_pipe(fmaD_regx4);
 %}

+// Additional patterns matching complement versions that we can map directly to
+// variants of the fused multiply-add instructions.
+
+// Single/Double precision fused floating-point multiply-sub (d = a * b - c)
+instruct fmsubF_regx4(regF dst, regF a, regF b, regF c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF (NegF c) (Binary a b)));
+  format %{ "fmsubs $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
+  ins_encode(fmsubs(dst, a, b, c));
+  ins_pipe(fmaF_regx4);
+%}
+
+instruct fmsubD_regx4(regD dst, regD a, regD b, regD c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD (NegD c) (Binary a b)));
+  format %{ "fmsubd $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
+  ins_encode(fmsubd(dst, a, b, c));
+  ins_pipe(fmaD_regx4);
+%}
+
+// Single/Double precision fused floating-point neg. multiply-add,
+//      d = -1 * a * b - c = -(a * b + c)
+instruct fnmaddF_regx4(regF dst, regF a, regF b, regF c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF (NegF c) (Binary (NegF a) b)));
+  match(Set dst (FmaF (NegF c) (Binary a (NegF b))));
+  format %{ "fnmadds $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
+  ins_encode(fnmadds(dst, a, b, c));
+  ins_pipe(fmaF_regx4);
+%}
+
+instruct fnmaddD_regx4(regD dst, regD a, regD b, regD c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD (NegD c) (Binary (NegD a) b)));
+  match(Set dst (FmaD (NegD c) (Binary a (NegD b))));
+  format %{ "fnmaddd $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
+  ins_encode(fnmaddd(dst, a, b, c));
+  ins_pipe(fmaD_regx4);
+%}
+
+// Single/Double precision fused floating-point neg. multiply-sub,
+//      d = -1 * a * b + c = -(a * b - c)
+instruct fnmsubF_regx4(regF dst, regF a, regF b, regF c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF c (Binary (NegF a) b)));
+  match(Set dst (FmaF c (Binary a (NegF b))));
+  format %{ "fnmsubs $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
+  ins_encode(fnmsubs(dst, a, b, c));
+  ins_pipe(fmaF_regx4);
+%}
+
+instruct fnmsubD_regx4(regD dst, regD a, regD b, regD c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD c (Binary (NegD a) b)));
+  match(Set dst (FmaD c (Binary a (NegD b))));
+  format %{ "fnmsubd $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
+  ins_encode(fnmsubd(dst, a, b, c));
+  ins_pipe(fmaD_regx4);
+%}
+
 //----------Logical Instructions-----------------------------------------------
 // And Instructions
 // Register And
--- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp
+++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp
@ -58,7 +58,6 @@
 // Note:  The register L7 is used as L7_thread_cache, and may not be used
 //        any other way within this module.

-
 static const Register& Lstub_temp = L2;

 // -------------------------------------------------------------------------------------------------------------------------
@ -4943,7 +4942,7 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

-/**
+  /**
   *  Arguments:
   *
   * Inputs:
@ -4975,6 +4974,773 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

+  /**
+   * Arguments:
+   *
+   * Inputs:
+   *   I0   - int* x-addr
+   *   I1   - int  x-len
+   *   I2   - int* y-addr
+   *   I3   - int  y-len
+   *   I4   - int* z-addr   (output vector)
+   *   I5   - int  z-len
+   */
+  address generate_multiplyToLen() {
+    assert(UseMultiplyToLenIntrinsic, "need VIS3 instructions");
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+    address start = __ pc();
+
+    __ save_frame(0);
+
+    const Register xptr = I0; // input address
+    const Register xlen = I1; // ...and length in 32b-words
+    const Register yptr = I2; //
+    const Register ylen = I3; //
+    const Register zptr = I4; // output address
+    const Register zlen = I5; // ...and length in 32b-words
+
+    /* The minimal "limb" representation suggest that odd length vectors are as
+     * likely as even length dittos. This in turn suggests that we need to cope
+     * with odd/even length arrays and data not aligned properly for 64-bit read
+     * and write operations. We thus use a number of different kernels:
+     *
+     *   if (is_even(x.len) && is_even(y.len))
+     *      if (is_align64(x) && is_align64(y) && is_align64(z))
+     *         if (x.len == y.len && 16 <= x.len && x.len <= 64)
+     *            memv_mult_mpmul(...)
+     *         else
+     *            memv_mult_64x64(...)
+     *      else
+     *         memv_mult_64x64u(...)
+     *   else
+     *      memv_mult_32x32(...)
+     *
+     * Here we assume VIS3 support (for 'umulxhi', 'addxc' and 'addxccc').
+     * In case CBCOND instructions are supported, we will use 'cxbX'. If the
+     * MPMUL instruction is supported, we will generate a kernel using 'mpmul'
+     * (for vectors with proper characteristics).
+     */
+    const Register tmp0 = L0;
+    const Register tmp1 = L1;
+
+    Label L_mult_32x32;
+    Label L_mult_64x64u;
+    Label L_mult_64x64;
+    Label L_exit;
+
+    if_both_even(xlen, ylen, tmp0, false, L_mult_32x32);
+    if_all3_aligned(xptr, yptr, zptr, tmp1, 64, false, L_mult_64x64u);
+
+    if (UseMPMUL) {
+      if_eq(xlen, ylen, false, L_mult_64x64);
+      if_in_rng(xlen, 16, 64, tmp0, tmp1, false, L_mult_64x64);
+
+      // 1. Multiply naturally aligned 64b-datums using a generic 'mpmul' kernel,
+      //    operating on equal length vectors of size [16..64].
+      gen_mult_mpmul(xlen, xptr, yptr, zptr, L_exit);
+    }
+
+    // 2. Multiply naturally aligned 64-bit datums (64x64).
+    __ bind(L_mult_64x64);
+    gen_mult_64x64(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
+
+    // 3. Multiply unaligned 64-bit datums (64x64).
+    __ bind(L_mult_64x64u);
+    gen_mult_64x64_unaligned(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
+
+    // 4. Multiply naturally aligned 32-bit datums (32x32).
+    __ bind(L_mult_32x32);
+    gen_mult_32x32(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
+
+    __ bind(L_exit);
+    __ ret();
+    __ delayed()->restore();
+
+    return start;
+  }
+
+  // Additional help functions used by multiplyToLen generation.
+
+  void if_both_even(Register r1, Register r2, Register tmp, bool iseven, Label &L)
+  {
+    __ or3(r1, r2, tmp);
+    __ andcc(tmp, 0x1, tmp);
+    __ br_icc_zero(iseven, Assembler::pn, L);
+  }
+
+  void if_all3_aligned(Register r1, Register r2, Register r3,
+                       Register tmp, uint align, bool isalign, Label &L)
+  {
+    __ or3(r1, r2, tmp);
+    __ or3(r3, tmp, tmp);
+    __ andcc(tmp, (align - 1), tmp);
+    __ br_icc_zero(isalign, Assembler::pn, L);
+  }
+
+  void if_eq(Register x, Register y, bool iseq, Label &L)
+  {
+    Assembler::Condition cf = (iseq ? Assembler::equal : Assembler::notEqual);
+    __ cmp_and_br_short(x, y, cf, Assembler::pt, L);
+  }
+
+  void if_in_rng(Register x, int lb, int ub, Register t1, Register t2, bool inrng, Label &L)
+  {
+    assert(Assembler::is_simm13(lb), "Small ints only!");
+    assert(Assembler::is_simm13(ub), "Small ints only!");
+    // Compute (x - lb) * (ub - x) >= 0
+    // NOTE: With the local use of this routine, we rely on small integers to
+    //       guarantee that we do not overflow in the multiplication.
+    __ add(G0, ub, t2);
+    __ sub(x, lb, t1);
+    __ sub(t2, x, t2);
+    __ mulx(t1, t2, t1);
+    Assembler::Condition cf = (inrng ? Assembler::greaterEqual : Assembler::less);
+    __ cmp_and_br_short(t1, G0, cf, Assembler::pt, L);
+  }
+
+  void ldd_entry(Register base, Register offs, FloatRegister dest)
+  {
+    __ ldd(base, offs, dest);
+    __ inc(offs, 8);
+  }
+
+  void ldx_entry(Register base, Register offs, Register dest)
+  {
+    __ ldx(base, offs, dest);
+    __ inc(offs, 8);
+  }
+
+  void mpmul_entry(int m, Label &next)
+  {
+    __ mpmul(m);
+    __ cbcond(Assembler::equal, Assembler::icc, G0, G0, next);
+  }
+
+  void stx_entry(Label &L, Register r1, Register r2, Register base, Register offs)
+  {
+    __ bind(L);
+    __ stx(r1, base, offs);
+    __ inc(offs, 8);
+    __ stx(r2, base, offs);
+    __ inc(offs, 8);
+  }
+
+  void offs_entry(Label &Lbl0, Label &Lbl1)
+  {
+    assert(Lbl0.is_bound(), "must be");
+    assert(Lbl1.is_bound(), "must be");
+
+    int offset = Lbl0.loc_pos() - Lbl1.loc_pos();
+
+    __ emit_data(offset);
+  }
+
+  /* Generate the actual multiplication kernels for BigInteger vectors:
+   *
+   *   1. gen_mult_mpmul(...)
+   *
+   *   2. gen_mult_64x64(...)
+   *
+   *   3. gen_mult_64x64_unaligned(...)
+   *
+   *   4. gen_mult_32x32(...)
+   */
+  void gen_mult_mpmul(Register len, Register xptr, Register yptr, Register zptr,
+                      Label &L_exit)
+  {
+    const Register zero = G0;
+    const Register gxp  = G1;   // Need to use global registers across RWs.
+    const Register gyp  = G2;
+    const Register gzp  = G3;
+    const Register offs = G4;
+    const Register disp = G5;
+
+    __ mov(xptr, gxp);
+    __ mov(yptr, gyp);
+    __ mov(zptr, gzp);
+
+    /* Compute jump vector entry:
+     *
+     *   1. mpmul input size (0..31) x 64b
+     *   2. vector input size in 32b limbs (even number)
+     *   3. branch entries in reverse order (31..0), using two
+     *      instructions per entry (2 * 4 bytes).
+     *
+     *   displacement = byte_offset(bra_offset(len))
+     *                = byte_offset((64 - len)/2)
+     *                = 8 * (64 - len)/2
+     *                = 4 * (64 - len)
+     */
+    Register temp = I5;         // Alright to use input regs. in first batch.
+
+    __ sub(zero, len, temp);
+    __ add(temp, 64, temp);
+    __ sllx(temp, 2, disp);     // disp := (64 - len) << 2
+
+    // Dispatch relative current PC, into instruction table below.
+    __ rdpc(temp);
+    __ add(temp, 16, temp);
+    __ jmp(temp, disp);
+    __ delayed()->clr(offs);
+
+    ldd_entry(gxp, offs, F22);
+    ldd_entry(gxp, offs, F20);
+    ldd_entry(gxp, offs, F18);
+    ldd_entry(gxp, offs, F16);
+    ldd_entry(gxp, offs, F14);
+    ldd_entry(gxp, offs, F12);
+    ldd_entry(gxp, offs, F10);
+    ldd_entry(gxp, offs, F8);
+    ldd_entry(gxp, offs, F6);
+    ldd_entry(gxp, offs, F4);
+    ldx_entry(gxp, offs, I5);
+    ldx_entry(gxp, offs, I4);
+    ldx_entry(gxp, offs, I3);
+    ldx_entry(gxp, offs, I2);
+    ldx_entry(gxp, offs, I1);
+    ldx_entry(gxp, offs, I0);
+    ldx_entry(gxp, offs, L7);
+    ldx_entry(gxp, offs, L6);
+    ldx_entry(gxp, offs, L5);
+    ldx_entry(gxp, offs, L4);
+    ldx_entry(gxp, offs, L3);
+    ldx_entry(gxp, offs, L2);
+    ldx_entry(gxp, offs, L1);
+    ldx_entry(gxp, offs, L0);
+    ldd_entry(gxp, offs, F2);
+    ldd_entry(gxp, offs, F0);
+    ldx_entry(gxp, offs, O5);
+    ldx_entry(gxp, offs, O4);
+    ldx_entry(gxp, offs, O3);
+    ldx_entry(gxp, offs, O2);
+    ldx_entry(gxp, offs, O1);
+    ldx_entry(gxp, offs, O0);
+
+    __ save(SP, -176, SP);
+
+    const Register addr = gxp;  // Alright to reuse 'gxp'.
+
+    // Dispatch relative current PC, into instruction table below.
+    __ rdpc(addr);
+    __ add(addr, 16, addr);
+    __ jmp(addr, disp);
+    __ delayed()->clr(offs);
+
+    ldd_entry(gyp, offs, F58);
+    ldd_entry(gyp, offs, F56);
+    ldd_entry(gyp, offs, F54);
+    ldd_entry(gyp, offs, F52);
+    ldd_entry(gyp, offs, F50);
+    ldd_entry(gyp, offs, F48);
+    ldd_entry(gyp, offs, F46);
+    ldd_entry(gyp, offs, F44);
+    ldd_entry(gyp, offs, F42);
+    ldd_entry(gyp, offs, F40);
+    ldd_entry(gyp, offs, F38);
+    ldd_entry(gyp, offs, F36);
+    ldd_entry(gyp, offs, F34);
+    ldd_entry(gyp, offs, F32);
+    ldd_entry(gyp, offs, F30);
+    ldd_entry(gyp, offs, F28);
+    ldd_entry(gyp, offs, F26);
+    ldd_entry(gyp, offs, F24);
+    ldx_entry(gyp, offs, O5);
+    ldx_entry(gyp, offs, O4);
+    ldx_entry(gyp, offs, O3);
+    ldx_entry(gyp, offs, O2);
+    ldx_entry(gyp, offs, O1);
+    ldx_entry(gyp, offs, O0);
+    ldx_entry(gyp, offs, L7);
+    ldx_entry(gyp, offs, L6);
+    ldx_entry(gyp, offs, L5);
+    ldx_entry(gyp, offs, L4);
+    ldx_entry(gyp, offs, L3);
+    ldx_entry(gyp, offs, L2);
+    ldx_entry(gyp, offs, L1);
+    ldx_entry(gyp, offs, L0);
+
+    __ save(SP, -176, SP);
+    __ save(SP, -176, SP);
+    __ save(SP, -176, SP);
+    __ save(SP, -176, SP);
+    __ save(SP, -176, SP);
+
+    Label L_mpmul_restore_4, L_mpmul_restore_3, L_mpmul_restore_2;
+    Label L_mpmul_restore_1, L_mpmul_restore_0;
+
+    // Dispatch relative current PC, into instruction table below.
+    __ rdpc(addr);
+    __ add(addr, 16, addr);
+    __ jmp(addr, disp);
+    __ delayed()->clr(offs);
+
+    mpmul_entry(31, L_mpmul_restore_0);
+    mpmul_entry(30, L_mpmul_restore_0);
+    mpmul_entry(29, L_mpmul_restore_0);
+    mpmul_entry(28, L_mpmul_restore_0);
+    mpmul_entry(27, L_mpmul_restore_1);
+    mpmul_entry(26, L_mpmul_restore_1);
+    mpmul_entry(25, L_mpmul_restore_1);
+    mpmul_entry(24, L_mpmul_restore_1);
+    mpmul_entry(23, L_mpmul_restore_1);
+    mpmul_entry(22, L_mpmul_restore_1);
+    mpmul_entry(21, L_mpmul_restore_1);
+    mpmul_entry(20, L_mpmul_restore_2);
+    mpmul_entry(19, L_mpmul_restore_2);
+    mpmul_entry(18, L_mpmul_restore_2);
+    mpmul_entry(17, L_mpmul_restore_2);
+    mpmul_entry(16, L_mpmul_restore_2);
+    mpmul_entry(15, L_mpmul_restore_2);
+    mpmul_entry(14, L_mpmul_restore_2);
+    mpmul_entry(13, L_mpmul_restore_3);
+    mpmul_entry(12, L_mpmul_restore_3);
+    mpmul_entry(11, L_mpmul_restore_3);
+    mpmul_entry(10, L_mpmul_restore_3);
+    mpmul_entry( 9, L_mpmul_restore_3);
+    mpmul_entry( 8, L_mpmul_restore_3);
+    mpmul_entry( 7, L_mpmul_restore_3);
+    mpmul_entry( 6, L_mpmul_restore_4);
+    mpmul_entry( 5, L_mpmul_restore_4);
+    mpmul_entry( 4, L_mpmul_restore_4);
+    mpmul_entry( 3, L_mpmul_restore_4);
+    mpmul_entry( 2, L_mpmul_restore_4);
+    mpmul_entry( 1, L_mpmul_restore_4);
+    mpmul_entry( 0, L_mpmul_restore_4);
+
+    Label L_z31, L_z30, L_z29, L_z28, L_z27, L_z26, L_z25, L_z24;
+    Label L_z23, L_z22, L_z21, L_z20, L_z19, L_z18, L_z17, L_z16;
+    Label L_z15, L_z14, L_z13, L_z12, L_z11, L_z10, L_z09, L_z08;
+    Label L_z07, L_z06, L_z05, L_z04, L_z03, L_z02, L_z01, L_z00;
+
+    Label L_zst_base;    // Store sequence base address.
+    __ bind(L_zst_base);
+
+    stx_entry(L_z31, L7, L6, gzp, offs);
+    stx_entry(L_z30, L5, L4, gzp, offs);
+    stx_entry(L_z29, L3, L2, gzp, offs);
+    stx_entry(L_z28, L1, L0, gzp, offs);
+    __ restore();
+    stx_entry(L_z27, O5, O4, gzp, offs);
+    stx_entry(L_z26, O3, O2, gzp, offs);
+    stx_entry(L_z25, O1, O0, gzp, offs);
+    stx_entry(L_z24, L7, L6, gzp, offs);
+    stx_entry(L_z23, L5, L4, gzp, offs);
+    stx_entry(L_z22, L3, L2, gzp, offs);
+    stx_entry(L_z21, L1, L0, gzp, offs);
+    __ restore();
+    stx_entry(L_z20, O5, O4, gzp, offs);
+    stx_entry(L_z19, O3, O2, gzp, offs);
+    stx_entry(L_z18, O1, O0, gzp, offs);
+    stx_entry(L_z17, L7, L6, gzp, offs);
+    stx_entry(L_z16, L5, L4, gzp, offs);
+    stx_entry(L_z15, L3, L2, gzp, offs);
+    stx_entry(L_z14, L1, L0, gzp, offs);
+    __ restore();
+    stx_entry(L_z13, O5, O4, gzp, offs);
+    stx_entry(L_z12, O3, O2, gzp, offs);
+    stx_entry(L_z11, O1, O0, gzp, offs);
+    stx_entry(L_z10, L7, L6, gzp, offs);
+    stx_entry(L_z09, L5, L4, gzp, offs);
+    stx_entry(L_z08, L3, L2, gzp, offs);
+    stx_entry(L_z07, L1, L0, gzp, offs);
+    __ restore();
+    stx_entry(L_z06, O5, O4, gzp, offs);
+    stx_entry(L_z05, O3, O2, gzp, offs);
+    stx_entry(L_z04, O1, O0, gzp, offs);
+    stx_entry(L_z03, L7, L6, gzp, offs);
+    stx_entry(L_z02, L5, L4, gzp, offs);
+    stx_entry(L_z01, L3, L2, gzp, offs);
+    stx_entry(L_z00, L1, L0, gzp, offs);
+
+    __ restore();
+    __ restore();
+    // Exit out of 'mpmul' routine, back to multiplyToLen.
+    __ ba_short(L_exit);
+
+    Label L_zst_offs;
+    __ bind(L_zst_offs);
+
+    offs_entry(L_z31, L_zst_base);  // index 31: 2048x2048
+    offs_entry(L_z30, L_zst_base);
+    offs_entry(L_z29, L_zst_base);
+    offs_entry(L_z28, L_zst_base);
+    offs_entry(L_z27, L_zst_base);
+    offs_entry(L_z26, L_zst_base);
+    offs_entry(L_z25, L_zst_base);
+    offs_entry(L_z24, L_zst_base);
+    offs_entry(L_z23, L_zst_base);
+    offs_entry(L_z22, L_zst_base);
+    offs_entry(L_z21, L_zst_base);
+    offs_entry(L_z20, L_zst_base);
+    offs_entry(L_z19, L_zst_base);
+    offs_entry(L_z18, L_zst_base);
+    offs_entry(L_z17, L_zst_base);
+    offs_entry(L_z16, L_zst_base);
+    offs_entry(L_z15, L_zst_base);
+    offs_entry(L_z14, L_zst_base);
+    offs_entry(L_z13, L_zst_base);
+    offs_entry(L_z12, L_zst_base);
+    offs_entry(L_z11, L_zst_base);
+    offs_entry(L_z10, L_zst_base);
+    offs_entry(L_z09, L_zst_base);
+    offs_entry(L_z08, L_zst_base);
+    offs_entry(L_z07, L_zst_base);
+    offs_entry(L_z06, L_zst_base);
+    offs_entry(L_z05, L_zst_base);
+    offs_entry(L_z04, L_zst_base);
+    offs_entry(L_z03, L_zst_base);
+    offs_entry(L_z02, L_zst_base);
+    offs_entry(L_z01, L_zst_base);
+    offs_entry(L_z00, L_zst_base);  // index  0:   64x64
+
+    __ bind(L_mpmul_restore_4);
+    __ restore();
+    __ bind(L_mpmul_restore_3);
+    __ restore();
+    __ bind(L_mpmul_restore_2);
+    __ restore();
+    __ bind(L_mpmul_restore_1);
+    __ restore();
+    __ bind(L_mpmul_restore_0);
+
+    // Dispatch via offset vector entry, into z-store sequence.
+    Label L_zst_rdpc;
+    __ bind(L_zst_rdpc);
+
+    assert(L_zst_base.is_bound(), "must be");
+    assert(L_zst_offs.is_bound(), "must be");
+    assert(L_zst_rdpc.is_bound(), "must be");
+
+    int dbase = L_zst_rdpc.loc_pos() - L_zst_base.loc_pos();
+    int doffs = L_zst_rdpc.loc_pos() - L_zst_offs.loc_pos();
+
+    temp = gyp;   // Alright to reuse 'gyp'.
+
+    __ rdpc(addr);
+    __ sub(addr, doffs, temp);
+    __ srlx(disp, 1, disp);
+    __ lduw(temp, disp, offs);
+    __ sub(addr, dbase, temp);
+    __ jmp(temp, offs);
+    __ delayed()->clr(offs);
+  }
+
+  void gen_mult_64x64(Register xp, Register xn,
+                      Register yp, Register yn,
+                      Register zp, Register zn, Label &L_exit)
+  {
+    // Assuming that a stack frame has already been created, i.e. local and
+    // output registers are available for immediate use.
+
+    const Register ri = L0;     // Outer loop index, xv[i]
+    const Register rj = L1;     // Inner loop index, yv[j]
+    const Register rk = L2;     // Output loop index, zv[k]
+    const Register rx = L4;     // x-vector datum [i]
+    const Register ry = L5;     // y-vector datum [j]
+    const Register rz = L6;     // z-vector datum [k]
+    const Register rc = L7;     // carry over (to z-vector datum [k-1])
+
+    const Register lop = O0;    // lo-64b product
+    const Register hip = O1;    // hi-64b product
+
+    const Register zero = G0;
+
+    Label L_loop_i,  L_exit_loop_i;
+    Label L_loop_j;
+    Label L_loop_i2, L_exit_loop_i2;
+
+    __ srlx(xn, 1, xn);         // index for u32 to u64 ditto
+    __ srlx(yn, 1, yn);         // index for u32 to u64 ditto
+    __ srlx(zn, 1, zn);         // index for u32 to u64 ditto
+    __ dec(xn);                 // Adjust [0..(N/2)-1]
+    __ dec(yn);
+    __ dec(zn);
+    __ clr(rc);                 // u64 c = 0
+    __ sllx(xn, 3, ri);         // int i = xn (byte offset i = 8*xn)
+    __ sllx(yn, 3, rj);         // int j = yn (byte offset i = 8*xn)
+    __ sllx(zn, 3, rk);         // int k = zn (byte offset k = 8*zn)
+    __ ldx(yp, rj, ry);         // u64 y = yp[yn]
+
+    // for (int i = xn; i >= 0; i--)
+    __ bind(L_loop_i);
+
+    __ cmp_and_br_short(ri, 0,  // i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i);
+    __ ldx(xp, ri, rx);         // x = xp[i]
+    __ mulx(rx, ry, lop);       // lo-64b-part of result 64x64
+    __ umulxhi(rx, ry, hip);    // hi-64b-part of result 64x64
+    __ addcc(rc, lop, lop);     // Accumulate lower order bits (producing carry)
+    __ addxc(hip, zero, rc);    // carry over to next datum [k-1]
+    __ stx(lop, zp, rk);        // z[k] = lop
+    __ dec(rk, 8);              // k--
+    __ dec(ri, 8);              // i--
+    __ ba_short(L_loop_i);
+
+    __ bind(L_exit_loop_i);
+    __ stx(rc, zp, rk);         // z[k] = c
+
+    // for (int j = yn - 1; j >= 0; j--)
+    __ sllx(yn, 3, rj);         // int j = yn - 1 (byte offset j = 8*yn)
+    __ dec(rj, 8);
+
+    __ bind(L_loop_j);
+
+    __ cmp_and_br_short(rj, 0,  // j >= 0
+                        Assembler::less, Assembler::pn, L_exit);
+    __ clr(rc);                 // u64 c = 0
+    __ ldx(yp, rj, ry);         // u64 y = yp[j]
+
+    // for (int i = xn, k = --zn; i >= 0; i--)
+    __ dec(zn);                 // --zn
+    __ sllx(xn, 3, ri);         // int i = xn (byte offset i = 8*xn)
+    __ sllx(zn, 3, rk);         // int k = zn (byte offset k = 8*zn)
+
+    __ bind(L_loop_i2);
+
+    __ cmp_and_br_short(ri, 0,  // i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i2);
+    __ ldx(xp, ri, rx);         // x = xp[i]
+    __ ldx(zp, rk, rz);         // z = zp[k], accumulator
+    __ mulx(rx, ry, lop);       // lo-64b-part of result 64x64
+    __ umulxhi(rx, ry, hip);    // hi-64b-part of result 64x64
+    __ addcc(rz, rc, rz);       // Accumulate lower order bits,
+    __ addxc(hip, zero, rc);    // Accumulate higher order bits to carry
+    __ addcc(rz, lop, rz);      //    z += lo(p) + c
+    __ addxc(rc, zero, rc);
+    __ stx(rz, zp, rk);         // zp[k] = z
+    __ dec(rk, 8);              // k--
+    __ dec(ri, 8);              // i--
+    __ ba_short(L_loop_i2);
+
+    __ bind(L_exit_loop_i2);
+    __ stx(rc, zp, rk);         // z[k] = c
+    __ dec(rj, 8);              // j--
+    __ ba_short(L_loop_j);
+  }
+
+  void gen_mult_64x64_unaligned(Register xp, Register xn,
+                                Register yp, Register yn,
+                                Register zp, Register zn, Label &L_exit)
+  {
+    // Assuming that a stack frame has already been created, i.e. local and
+    // output registers are available for use.
+
+    const Register xpc = L0;    // Outer loop cursor, xp[i]
+    const Register ypc = L1;    // Inner loop cursor, yp[j]
+    const Register zpc = L2;    // Output loop cursor, zp[k]
+    const Register rx  = L4;    // x-vector datum [i]
+    const Register ry  = L5;    // y-vector datum [j]
+    const Register rz  = L6;    // z-vector datum [k]
+    const Register rc  = L7;    // carry over (to z-vector datum [k-1])
+    const Register rt  = O2;
+
+    const Register lop = O0;    // lo-64b product
+    const Register hip = O1;    // hi-64b product
+
+    const Register zero = G0;
+
+    Label L_loop_i,  L_exit_loop_i;
+    Label L_loop_j;
+    Label L_loop_i2, L_exit_loop_i2;
+
+    __ srlx(xn, 1, xn);         // index for u32 to u64 ditto
+    __ srlx(yn, 1, yn);         // index for u32 to u64 ditto
+    __ srlx(zn, 1, zn);         // index for u32 to u64 ditto
+    __ dec(xn);                 // Adjust [0..(N/2)-1]
+    __ dec(yn);
+    __ dec(zn);
+    __ clr(rc);                 // u64 c = 0
+    __ sllx(xn, 3, xpc);        // u32* xpc = &xp[xn] (byte offset 8*xn)
+    __ add(xp, xpc, xpc);
+    __ sllx(yn, 3, ypc);        // u32* ypc = &yp[yn] (byte offset 8*yn)
+    __ add(yp, ypc, ypc);
+    __ sllx(zn, 3, zpc);        // u32* zpc = &zp[zn] (byte offset 8*zn)
+    __ add(zp, zpc, zpc);
+    __ lduw(ypc, 0, rt);        // u64 y = yp[yn]
+    __ lduw(ypc, 4, ry);        //   ...
+    __ sllx(rt, 32, rt);
+    __ or3(rt, ry, ry);
+
+    // for (int i = xn; i >= 0; i--)
+    __ bind(L_loop_i);
+
+    __ cmp_and_br_short(xpc, xp,// i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i);
+    __ lduw(xpc, 0, rt);        // u64 x = xp[i]
+    __ lduw(xpc, 4, rx);        //   ...
+    __ sllx(rt, 32, rt);
+    __ or3(rt, rx, rx);
+    __ mulx(rx, ry, lop);       // lo-64b-part of result 64x64
+    __ umulxhi(rx, ry, hip);    // hi-64b-part of result 64x64
+    __ addcc(rc, lop, lop);     // Accumulate lower order bits (producing carry)
+    __ addxc(hip, zero, rc);    // carry over to next datum [k-1]
+    __ srlx(lop, 32, rt);
+    __ stw(rt, zpc, 0);         // z[k] = lop
+    __ stw(lop, zpc, 4);        //   ...
+    __ dec(zpc, 8);             // k-- (zpc--)
+    __ dec(xpc, 8);             // i-- (xpc--)
+    __ ba_short(L_loop_i);
+
+    __ bind(L_exit_loop_i);
+    __ srlx(rc, 32, rt);
+    __ stw(rt, zpc, 0);         // z[k] = c
+    __ stw(rc, zpc, 4);
+
+    // for (int j = yn - 1; j >= 0; j--)
+    __ sllx(yn, 3, ypc);        // u32* ypc = &yp[yn] (byte offset 8*yn)
+    __ add(yp, ypc, ypc);
+    __ dec(ypc, 8);             // yn - 1 (ypc--)
+
+    __ bind(L_loop_j);
+
+    __ cmp_and_br_short(ypc, yp,// j >= 0
+                        Assembler::less, Assembler::pn, L_exit);
+    __ clr(rc);                 // u64 c = 0
+    __ lduw(ypc, 0, rt);        // u64 y = yp[j] (= *ypc)
+    __ lduw(ypc, 4, ry);        //   ...
+    __ sllx(rt, 32, rt);
+    __ or3(rt, ry, ry);
+
+    // for (int i = xn, k = --zn; i >= 0; i--)
+    __ sllx(xn, 3, xpc);        // u32* xpc = &xp[xn] (byte offset 8*xn)
+    __ add(xp, xpc, xpc);
+    __ dec(zn);                 // --zn
+    __ sllx(zn, 3, zpc);        // u32* zpc = &zp[zn] (byte offset 8*zn)
+    __ add(zp, zpc, zpc);
+
+    __ bind(L_loop_i2);
+
+    __ cmp_and_br_short(xpc, xp,// i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i2);
+    __ lduw(xpc, 0, rt);        // u64 x = xp[i] (= *xpc)
+    __ lduw(xpc, 4, rx);        //   ...
+    __ sllx(rt, 32, rt);
+    __ or3(rt, rx, rx);
+
+    __ lduw(zpc, 0, rt);        // u64 z = zp[k] (= *zpc)
+    __ lduw(zpc, 4, rz);        //   ...
+    __ sllx(rt, 32, rt);
+    __ or3(rt, rz, rz);
+
+    __ mulx(rx, ry, lop);       // lo-64b-part of result 64x64
+    __ umulxhi(rx, ry, hip);    // hi-64b-part of result 64x64
+    __ addcc(rz, rc, rz);       // Accumulate lower order bits...
+    __ addxc(hip, zero, rc);    // Accumulate higher order bits to carry
+    __ addcc(rz, lop, rz);      // ... z += lo(p) + c
+    __ addxccc(rc, zero, rc);
+    __ srlx(rz, 32, rt);
+    __ stw(rt, zpc, 0);         // zp[k] = z    (*zpc = z)
+    __ stw(rz, zpc, 4);
+    __ dec(zpc, 8);             // k-- (zpc--)
+    __ dec(xpc, 8);             // i-- (xpc--)
+    __ ba_short(L_loop_i2);
+
+    __ bind(L_exit_loop_i2);
+    __ srlx(rc, 32, rt);
+    __ stw(rt, zpc, 0);         // z[k] = c
+    __ stw(rc, zpc, 4);
+    __ dec(ypc, 8);             // j-- (ypc--)
+    __ ba_short(L_loop_j);
+  }
+
+  void gen_mult_32x32(Register xp, Register xn,
+                      Register yp, Register yn,
+                      Register zp, Register zn, Label &L_exit)
+  {
+    // Assuming that a stack frame has already been created, i.e. local and
+    // output registers are available for use.
+
+    const Register ri = L0;     // Outer loop index, xv[i]
+    const Register rj = L1;     // Inner loop index, yv[j]
+    const Register rk = L2;     // Output loop index, zv[k]
+    const Register rx = L4;     // x-vector datum [i]
+    const Register ry = L5;     // y-vector datum [j]
+    const Register rz = L6;     // z-vector datum [k]
+    const Register rc = L7;     // carry over (to z-vector datum [k-1])
+
+    const Register p64 = O0;    // 64b product
+    const Register z65 = O1;    // carry+64b accumulator
+    const Register c65 = O2;    // carry at bit 65
+    const Register c33 = O2;    // carry at bit 33 (after shift)
+
+    const Register zero = G0;
+
+    Label L_loop_i,  L_exit_loop_i;
+    Label L_loop_j;
+    Label L_loop_i2, L_exit_loop_i2;
+
+    __ dec(xn);                 // Adjust [0..N-1]
+    __ dec(yn);
+    __ dec(zn);
+    __ clr(rc);                 // u32 c = 0
+    __ sllx(xn, 2, ri);         // int i = xn (byte offset i = 4*xn)
+    __ sllx(yn, 2, rj);         // int j = yn (byte offset i = 4*xn)
+    __ sllx(zn, 2, rk);         // int k = zn (byte offset k = 4*zn)
+    __ lduw(yp, rj, ry);        // u32 y = yp[yn]
+
+    // for (int i = xn; i >= 0; i--)
+    __ bind(L_loop_i);
+
+    __ cmp_and_br_short(ri, 0,  // i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i);
+    __ lduw(xp, ri, rx);        // x = xp[i]
+    __ mulx(rx, ry, p64);       // 64b result of 32x32
+    __ addcc(rc, p64, z65);     // Accumulate to 65 bits (producing carry)
+    __ addxc(zero, zero, c65);  // Materialise carry (in bit 65) into lsb,
+    __ sllx(c65, 32, c33);      // and shift into bit 33
+    __ srlx(z65, 32, rc);       // carry = c33 | hi(z65) >> 32
+    __ add(c33, rc, rc);        // carry over to next datum [k-1]
+    __ stw(z65, zp, rk);        // z[k] = lo(z65)
+    __ dec(rk, 4);              // k--
+    __ dec(ri, 4);              // i--
+    __ ba_short(L_loop_i);
+
+    __ bind(L_exit_loop_i);
+    __ stw(rc, zp, rk);         // z[k] = c
+
+    // for (int j = yn - 1; j >= 0; j--)
+    __ sllx(yn, 2, rj);         // int j = yn - 1 (byte offset j = 4*yn)
+    __ dec(rj, 4);
+
+    __ bind(L_loop_j);
+
+    __ cmp_and_br_short(rj, 0,  // j >= 0
+                        Assembler::less, Assembler::pn, L_exit);
+    __ clr(rc);                 // u32 c = 0
+    __ lduw(yp, rj, ry);        // u32 y = yp[j]
+
+    // for (int i = xn, k = --zn; i >= 0; i--)
+    __ dec(zn);                 // --zn
+    __ sllx(xn, 2, ri);         // int i = xn (byte offset i = 4*xn)
+    __ sllx(zn, 2, rk);         // int k = zn (byte offset k = 4*zn)
+
+    __ bind(L_loop_i2);
+
+    __ cmp_and_br_short(ri, 0,  // i >= 0
+                        Assembler::less, Assembler::pn, L_exit_loop_i2);
+    __ lduw(xp, ri, rx);        // x = xp[i]
+    __ lduw(zp, rk, rz);        // z = zp[k], accumulator
+    __ mulx(rx, ry, p64);       // 64b result of 32x32
+    __ add(rz, rc, rz);         // Accumulate lower order bits,
+    __ addcc(rz, p64, z65);     //   z += lo(p64) + c
+    __ addxc(zero, zero, c65);  // Materialise carry (in bit 65) into lsb,
+    __ sllx(c65, 32, c33);      // and shift into bit 33
+    __ srlx(z65, 32, rc);       // carry = c33 | hi(z65) >> 32
+    __ add(c33, rc, rc);        // carry over to next datum [k-1]
+    __ stw(z65, zp, rk);        // zp[k] = lo(z65)
+    __ dec(rk, 4);              // k--
+    __ dec(ri, 4);              // i--
+    __ ba_short(L_loop_i2);
+
+    __ bind(L_exit_loop_i2);
+    __ stw(rc, zp, rk);         // z[k] = c
+    __ dec(rj, 4);              // j--
+    __ ba_short(L_loop_j);
+  }
+
+
  void generate_initial() {
    // Generates all stubs and initializes the entry points

@ -5073,8 +5839,14 @@ class StubGenerator: public StubCodeGenerator {
    if (UseAdler32Intrinsics) {
      StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
    }
-  }

+#ifdef COMPILER2
+    // Intrinsics supported by C2 only:
+    if (UseMultiplyToLenIntrinsic) {
+      StubRoutines::_multiplyToLen = generate_multiplyToLen();
+    }
+#endif // COMPILER2
+  }

 public:
  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
--- a/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp
+++ b/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp
@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc)   {
 enum /* platform_dependent_constants */ {
  // %%%%%%%% May be able to shrink this a lot
  code_size1 = 20000,           // simply increase if too small (assembler will crash if too small)
-  code_size2 = 27000            // simply increase if too small (assembler will crash if too small)
+  code_size2 = 29000            // simply increase if too small (assembler will crash if too small)
 };

 class Sparc {
--- a/src/hotspot/cpu/sparc/vmStructs_sparc.hpp
+++ b/src/hotspot/cpu/sparc/vmStructs_sparc.hpp
@ -101,6 +101,14 @@
  declare_constant(VM_Version::ISA_XMONT)               \
  declare_constant(VM_Version::ISA_PAUSE_NSEC)          \
  declare_constant(VM_Version::ISA_VAMASK)              \
+  declare_constant(VM_Version::ISA_SPARC6)              \
+  declare_constant(VM_Version::ISA_DICTUNP)             \
+  declare_constant(VM_Version::ISA_FPCMPSHL)            \
+  declare_constant(VM_Version::ISA_RLE)                 \
+  declare_constant(VM_Version::ISA_SHA3)                \
+  declare_constant(VM_Version::ISA_VIS3C)               \
+  declare_constant(VM_Version::ISA_SPARC5B)             \
+  declare_constant(VM_Version::ISA_MME)                 \
  declare_constant(VM_Version::CPU_FAST_IDIV)           \
  declare_constant(VM_Version::CPU_FAST_RDPC)           \
  declare_constant(VM_Version::CPU_FAST_BIS)            \
--- a/src/hotspot/cpu/sparc/vm_version_sparc.cpp
+++ b/src/hotspot/cpu/sparc/vm_version_sparc.cpp
@ -103,7 +103,7 @@ void VM_Version::initialize() {
      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
    }
    else if (has_sparc5()) {
-      // Use prefetch instruction to avoid partial RAW issue on Core S4 processors,
+      // Use prefetch instruction to avoid partial RAW issue on Core C4 processors,
      // also use prefetch style 3.
      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
      if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
@ -128,7 +128,7 @@ void VM_Version::initialize() {

  // We increase the number of prefetched cache lines, to use just a bit more
  // aggressive approach, when the L2-cache line size is small (32 bytes), or
-  // when running on newer processor implementations, such as the Core S4.
+  // when running on newer processor implementations, such as the Core C4.
  bool inc_prefetch = cache_line_size > 0 && (cache_line_size < 64 || has_sparc5());

  if (inc_prefetch) {
@ -168,6 +168,16 @@ void VM_Version::initialize() {
    FLAG_SET_DEFAULT(UseCBCond, false);
  }

+  // Use 'mpmul' instruction if available.
+  if (has_mpmul()) {
+    if (FLAG_IS_DEFAULT(UseMPMUL)) {
+      FLAG_SET_DEFAULT(UseMPMUL, true);
+    }
+  } else if (UseMPMUL) {
+    warning("MPMUL instruction is not available on this CPU");
+    FLAG_SET_DEFAULT(UseMPMUL, false);
+  }
+
  assert(BlockZeroingLowLimit > 0, "invalid value");

  if (has_blk_zeroing() && cache_line_size > 0) {
@ -208,7 +218,9 @@ void VM_Version::initialize() {

  char buf[512];
  jio_snprintf(buf, sizeof(buf),
-               "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+               "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
+               "%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s"
+               "%s%s%s%s%s%s%s",
               (has_v9()          ? "v9" : ""),
               (has_popc()        ? ", popc" : ""),
               (has_vis1()        ? ", vis1" : ""),
@ -241,6 +253,16 @@ void VM_Version::initialize() {
               (has_pause_nsec()  ? ", pause_nsec" : ""),
               (has_vamask()      ? ", vamask" : ""),

+               (has_sparc6()      ? ", sparc6" : ""),
+               (has_dictunp()     ? ", dictunp" : ""),
+               (has_fpcmpshl()    ? ", fpcmpshl" : ""),
+               (has_rle()         ? ", rle" : ""),
+               (has_sha3()        ? ", sha3" : ""),
+               (has_athena_plus2()? ", athena_plus2" : ""),
+               (has_vis3c()       ? ", vis3c" : ""),
+               (has_sparc5b()     ? ", sparc5b" : ""),
+               (has_mme()         ? ", mme" : ""),
+
               (has_fast_idiv()   ? ", *idiv" : ""),
               (has_fast_rdpc()   ? ", *rdpc" : ""),
               (has_fast_bis()    ? ", *bis" : ""),
@ -409,6 +431,15 @@ void VM_Version::initialize() {
    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
  }

+  if (UseVIS > 2) {
+    if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+      FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
+    }
+  } else if (UseMultiplyToLenIntrinsic) {
+    warning("SPARC multiplyToLen intrinsics require VIS3 instructions support. Intrinsics will be disabled");
+    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
+  }
+
  if (UseVectorizedMismatchIntrinsic) {
    warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
--- a/src/hotspot/cpu/sparc/vm_version_sparc.hpp
+++ b/src/hotspot/cpu/sparc/vm_version_sparc.hpp
@ -67,6 +67,16 @@ protected:
    ISA_PAUSE_NSEC,
    ISA_VAMASK,

+    ISA_SPARC6,
+    ISA_DICTUNP,
+    ISA_FPCMPSHL,
+    ISA_RLE,
+    ISA_SHA3,
+    ISA_FJATHPLUS2,
+    ISA_VIS3C,
+    ISA_SPARC5B,
+    ISA_MME,
+
    // Synthesised properties:

    CPU_FAST_IDIV,
@ -79,7 +89,7 @@ protected:
  };

 private:
-  enum { ISA_last_feature = ISA_VAMASK,
+  enum { ISA_last_feature = ISA_MME,
         CPU_last_feature = CPU_BLK_ZEROING };

  enum {
@ -119,6 +129,16 @@ private:
    ISA_pause_nsec_msk  = UINT64_C(1) << ISA_PAUSE_NSEC,
    ISA_vamask_msk      = UINT64_C(1) << ISA_VAMASK,

+    ISA_sparc6_msk      = UINT64_C(1) << ISA_SPARC6,
+    ISA_dictunp_msk     = UINT64_C(1) << ISA_DICTUNP,
+    ISA_fpcmpshl_msk    = UINT64_C(1) << ISA_FPCMPSHL,
+    ISA_rle_msk         = UINT64_C(1) << ISA_RLE,
+    ISA_sha3_msk        = UINT64_C(1) << ISA_SHA3,
+    ISA_fjathplus2_msk  = UINT64_C(1) << ISA_FJATHPLUS2,
+    ISA_vis3c_msk       = UINT64_C(1) << ISA_VIS3C,
+    ISA_sparc5b_msk     = UINT64_C(1) << ISA_SPARC5B,
+    ISA_mme_msk         = UINT64_C(1) << ISA_MME,
+
    CPU_fast_idiv_msk   = UINT64_C(1) << CPU_FAST_IDIV,
    CPU_fast_rdpc_msk   = UINT64_C(1) << CPU_FAST_RDPC,
    CPU_fast_bis_msk    = UINT64_C(1) << CPU_FAST_BIS,
@ -153,40 +173,51 @@ private:
 *  UltraSPARC T2+:    (Victoria Falls, etc.)
 *    SPARC-V9, VIS, VIS2, ASI_BIS, POPC    (Crypto/hash in SPU)
 *
- *  UltraSPARC T3:     (Rainbow Falls/S2)
+ *  UltraSPARC T3:     (Rainbow Falls/C2)
 *    SPARC-V9, VIS, VIS2, ASI_BIS, POPC    (Crypto/hash in SPU)
 *
- *  Oracle SPARC T4/T5/M5:  (Core S3)
+ *  Oracle SPARC T4/T5/M5:  (Core C3)
 *    SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
 *    AES, DES, Kasumi, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL
 *
- *  Oracle SPARC M7:   (Core S4)
+ *  Oracle SPARC M7:   (Core C4)
 *    SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
 *    AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
 *    ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK
 *
+ *  Oracle SPARC M8:   (Core C5)
+ *    SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
+ *    AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
+ *    ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK, SPARC6, FPCMPSHL,
+ *    DICTUNP, RLE, SHA3, MME
+ *
+ *    NOTE: Oracle Number support ignored.
 */
  enum {
    niagara1_msk = ISA_v9_msk | ISA_vis1_msk | ISA_blk_init_msk,
    niagara2_msk = niagara1_msk | ISA_popc_msk,

-    core_S2_msk  = niagara2_msk | ISA_vis2_msk,
+    core_C2_msk  = niagara2_msk | ISA_vis2_msk,

-    core_S3_msk  = core_S2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
+    core_C3_msk  = core_C2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
        ISA_ima_msk | ISA_aes_msk | ISA_des_msk | ISA_kasumi_msk |
        ISA_camellia_msk | ISA_md5_msk | ISA_sha1_msk | ISA_sha256_msk |
        ISA_sha512_msk | ISA_mpmul_msk | ISA_mont_msk | ISA_pause_msk |
        ISA_cbcond_msk | ISA_crc32c_msk,

-    core_S4_msk  = core_S3_msk - ISA_kasumi_msk |
+    core_C4_msk  = core_C3_msk - ISA_kasumi_msk |
        ISA_vis3b_msk | ISA_adi_msk | ISA_sparc5_msk | ISA_mwait_msk |
        ISA_xmpmul_msk | ISA_xmont_msk | ISA_pause_nsec_msk | ISA_vamask_msk,

+    core_C5_msk = core_C4_msk | ISA_sparc6_msk | ISA_dictunp_msk |
+        ISA_fpcmpshl_msk | ISA_rle_msk | ISA_sha3_msk | ISA_mme_msk,
+
    ultra_sparc_t1_msk = niagara1_msk,
    ultra_sparc_t2_msk = niagara2_msk,
-    ultra_sparc_t3_msk = core_S2_msk,
-    ultra_sparc_m5_msk = core_S3_msk,   // NOTE: First out-of-order pipeline.
-    ultra_sparc_m7_msk = core_S4_msk
+    ultra_sparc_t3_msk = core_C2_msk,
+    ultra_sparc_m5_msk = core_C3_msk,   // NOTE: First out-of-order pipeline.
+    ultra_sparc_m7_msk = core_C4_msk,
+    ultra_sparc_m8_msk = core_C5_msk
  };

  static uint _L2_data_cache_line_size;
@ -247,6 +278,16 @@ public:
  static bool has_pause_nsec()   { return (_features & ISA_pause_nsec_msk) != 0; }
  static bool has_vamask()       { return (_features & ISA_vamask_msk) != 0; }

+  static bool has_sparc6()       { return (_features & ISA_sparc6_msk) != 0; }
+  static bool has_dictunp()      { return (_features & ISA_dictunp_msk) != 0; }
+  static bool has_fpcmpshl()     { return (_features & ISA_fpcmpshl_msk) != 0; }
+  static bool has_rle()          { return (_features & ISA_rle_msk) != 0; }
+  static bool has_sha3()         { return (_features & ISA_sha3_msk) != 0; }
+  static bool has_athena_plus2() { return (_features & ISA_fjathplus2_msk) != 0; }
+  static bool has_vis3c()        { return (_features & ISA_vis3c_msk) != 0; }
+  static bool has_sparc5b()      { return (_features & ISA_sparc5b_msk) != 0; }
+  static bool has_mme()          { return (_features & ISA_mme_msk) != 0; }
+
  static bool has_fast_idiv()    { return (_features & CPU_fast_idiv_msk) != 0; }
  static bool has_fast_rdpc()    { return (_features & CPU_fast_rdpc_msk) != 0; }
  static bool has_fast_bis()     { return (_features & CPU_fast_bis_msk) != 0; }
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@ -770,8 +770,15 @@ static void *thread_native_entry(Thread *thread) {
  const pthread_t pthread_id = ::pthread_self();
  const tid_t kernel_thread_id = ::thread_self();

-  log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT ").",
-    os::current_thread_id(), (uintx) kernel_thread_id);
+  LogTarget(Info, os, thread) lt;
+  if (lt.is_enabled()) {
+    address low_address = thread->stack_end();
+    address high_address = thread->stack_base();
+    lt.print("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT
+             ", stack [" PTR_FORMAT " - " PTR_FORMAT " (" SIZE_FORMAT "k using %uk pages)).",
+             os::current_thread_id(), (uintx) kernel_thread_id, low_address, high_address,
+             (high_address - low_address) / K, os::Aix::query_pagesize(low_address) / K);
+  }

  // Normally, pthread stacks on AIX live in the data segment (are allocated with malloc()
  // by the pthread library). In rare cases, this may not be the case, e.g. when third-party
@ -864,6 +871,14 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
  // Calculate stack size if it's not specified by caller.
  size_t stack_size = os::Posix::get_initial_stack_size(thr_type, req_stack_size);

+  // JDK-8187028: It was observed that on some configurations (4K backed thread stacks)
+  // the real thread stack size may be smaller than the requested stack size, by as much as 64K.
+  // This very much looks like a pthread lib error. As a workaround, increase the stack size
+  // by 64K for small thread stacks (arbitrarily choosen to be < 4MB)
+  if (stack_size < 4096 * K) {
+    stack_size += 64 * K;
+  }
+
  // On Aix, pthread_attr_setstacksize fails with huge values and leaves the
  // thread size in attr unchanged. If this is the minimal stack size as set
  // by pthread_attr_init this leads to crashes after thread creation. E.g. the
--- a/src/hotspot/os/windows/os_windows.cpp
+++ b/src/hotspot/os/windows/os_windows.cpp
@ -428,7 +428,7 @@ static unsigned __stdcall thread_native_entry(Thread* thread) {
  // When the VMThread gets here, the main thread may have already exited
  // which frees the CodeHeap containing the Atomic::add code
  if (thread != VMThread::vm_thread() && VMThread::vm_thread() != NULL) {
-    Atomic::dec_ptr((intptr_t*)&os::win32::_os_thread_count);
+    Atomic::dec(&os::win32::_os_thread_count);
  }

  // If a thread has not deleted itself ("delete this") as part of its
@ -634,7 +634,7 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
    return NULL;
  }

-  Atomic::inc_ptr((intptr_t*)&os::win32::_os_thread_count);
+  Atomic::inc(&os::win32::_os_thread_count);

  // Store info on the Win32 thread into the OSThread
  osthread->set_thread_handle(thread_handle);
--- a/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp
+++ b/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp
@ -149,83 +149,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
 }


-inline void Atomic::inc    (volatile jint*     dest) {
-
-  unsigned int temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: lwarx   %0,  0, %2    \n"
-    "   addic   %0, %0,  1    \n"
-    "   stwcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-
-  long temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: ldarx   %0,  0, %2    \n"
-    "   addic   %0, %0,  1    \n"
-    "   stdcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-
-inline void Atomic::dec    (volatile jint*     dest) {
-
-  unsigned int temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: lwarx   %0,  0, %2    \n"
-    "   addic   %0, %0, -1    \n"
-    "   stwcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-
-  long temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: ldarx   %0,  0, %2    \n"
-    "   addic   %0, %0, -1    \n"
-    "   stdcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
 inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {

  // Note that xchg_ptr doesn't necessarily do an acquire
--- a/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp
+++ b/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp
@ -61,24 +61,6 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest) co
  return old_value;
 }

-inline void Atomic::inc    (volatile jint*     dest) {
-  __asm__ volatile (  "lock addl $1,(%0)" :
-                    : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-inline void Atomic::dec    (volatile jint*     dest) {
-  __asm__ volatile (  "lock subl $1,(%0)" :
-                    : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
 inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
  __asm__ volatile (  "xchgl (%2),%0"
                    : "=r" (exchange_value)
@ -136,20 +118,6 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) co
  return old_value;
 }

-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  __asm__ __volatile__ (  "lock addq $1,(%0)"
-                        :
-                        : "r" (dest)
-                        : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  __asm__ __volatile__ (  "lock subq $1,(%0)"
-                        :
-                        : "r" (dest)
-                        : "cc", "memory");
-}
-
 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
  __asm__ __volatile__ ("xchgq (%2),%0"
                        : "=r" (exchange_value)
@ -176,14 +144,6 @@ inline jlong Atomic::load(const volatile jlong* src) { return *src; }

 #else // !AMD64

-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  inc((volatile jint*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  dec((volatile jint*)dest);
-}
-
 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
  return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
 }
--- a/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp
+++ b/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp
@ -207,30 +207,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
  return __sync_add_and_fetch(dest, add_value);
 }

-inline void Atomic::inc(volatile jint* dest) {
-  add(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  add_ptr(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
-  add_ptr(1, dest);
-}
-
-inline void Atomic::dec(volatile jint* dest) {
-  add(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  add_ptr(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
-  add_ptr(-1, dest);
-}
-
 inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
 #ifdef ARM
  return arm_lock_test_and_set(dest, exchange_value);
--- a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp
+++ b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp
@ -57,26 +57,6 @@ struct Atomic::PlatformAdd
  }
 };

-inline void Atomic::inc(volatile jint* dest)
-{
- add(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest)
-{
- add_ptr(1, dest);
-}
-
-inline void Atomic::dec (volatile jint* dest)
-{
- add(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest)
-{
- add_ptr(-1, dest);
-}
-
 inline jint Atomic::xchg (jint exchange_value, volatile jint* dest)
 {
  jint res = __sync_lock_test_and_set (dest, exchange_value);
@ -110,16 +90,6 @@ inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
 inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
 inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }

-inline void Atomic::inc_ptr(volatile intptr_t* dest)
-{
- add_ptr(1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest)
-{
- add_ptr(-1, dest);
-}
-
 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
 {
  intptr_t res = __sync_lock_test_and_set (dest, exchange_value);
--- a/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp
@ -122,14 +122,6 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) co
 #endif
 }

-inline void Atomic::inc(volatile jint* dest) {
-  Atomic::add(1, (volatile jint *)dest);
-}
-
-inline void Atomic::dec(volatile jint* dest) {
-  Atomic::add(-1, (volatile jint *)dest);
-}
-
 #ifdef AARCH64
 template<>
 template<typename I, typename D>
@ -151,23 +143,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
 }
 #endif // AARCH64

-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  Atomic::add_ptr(1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  Atomic::add_ptr(-1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
-
 inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
 #ifdef AARCH64
  jint old_val;
--- a/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp
+++ b/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp
@ -146,84 +146,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
  return result;
 }

-
-inline void Atomic::inc    (volatile jint*     dest) {
-
-  unsigned int temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: lwarx   %0,  0, %2    \n"
-    "   addic   %0, %0,  1    \n"
-    "   stwcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-
-  long temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: ldarx   %0,  0, %2    \n"
-    "   addic   %0, %0,  1    \n"
-    "   stdcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-
-inline void Atomic::dec    (volatile jint*     dest) {
-
-  unsigned int temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: lwarx   %0,  0, %2    \n"
-    "   addic   %0, %0, -1    \n"
-    "   stwcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-
-  long temp;
-
-  __asm__ __volatile__ (
-    strasm_nobarrier
-    "1: ldarx   %0,  0, %2    \n"
-    "   addic   %0, %0, -1    \n"
-    "   stdcx.  %0,  0, %2    \n"
-    "   bne-    1b            \n"
-    strasm_nobarrier
-    : /*%0*/"=&r" (temp), "=m" (*dest)
-    : /*%2*/"r" (dest), "m" (*dest)
-    : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
 inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {

  // Note that xchg_ptr doesn't necessarily do an acquire
--- a/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp
+++ b/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp
@ -192,219 +192,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest) const {
 }


-//------------
-// Atomic::inc
-//------------
-// These methods force the value in memory to be incremented (augmented by 1).
-// Both, memory value and increment, are treated as 32bit signed binary integers.
-// No overflow exceptions are recognized, and the condition code does not hold
-// information about the value in memory.
-//
-// The value in memory is updated by using a compare-and-swap instruction. The
-// instruction is retried as often as required.
-
-inline void Atomic::inc(volatile jint* dest) {
-  unsigned int old, upd;
-
-  if (VM_Version::has_LoadAndALUAtomicV1()) {
-//  tty->print_cr("Atomic::inc     called... dest @%p", dest);
-    __asm__ __volatile__ (
-      "   LGHI     2,1                     \n\t" // load increment
-      "   LA       3,%[mem]                \n\t" // force data address into ARG2
-//    "   LAA      %[upd],%[inc],%[mem]    \n\t" // increment and get old value
-//    "   LAA      2,2,0(3)                \n\t" // actually coded instruction
-      "   .byte    0xeb                    \n\t" // LAA main opcode
-      "   .byte    0x22                    \n\t" // R1,R3
-      "   .byte    0x30                    \n\t" // R2,disp1
-      "   .byte    0x00                    \n\t" // disp2,disp3
-      "   .byte    0x00                    \n\t" // disp4,disp5
-      "   .byte    0xf8                    \n\t" // LAA minor opcode
-      "   AGHI     2,1                     \n\t" // calc new value in register
-      "   LR       %[upd],2                \n\t" // move to result register
-      //---<  outputs  >---
-      : [upd]  "=&d" (upd)    // write-only, updated counter value
-      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-//    : [inc]  "a"   (inc)    // read-only.
-      //---<  clobbered  >---
-      : "cc", "r2", "r3", "memory"
-    );
-  } else {
-    __asm__ __volatile__ (
-      "   LLGF     %[old],%[mem]           \n\t" // get old value
-      "0: LA       %[upd],1(,%[old])       \n\t" // calc result
-      "   CS       %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
-      "   JNE      0b                      \n\t" // no success? -> retry
-      //---<  outputs  >---
-      : [old] "=&a" (old)    // write-only, old counter value
-      , [upd] "=&d" (upd)    // write-only, updated counter value
-      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-      //---<  clobbered  >---
-      : "cc", "memory"
-    );
-  }
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  unsigned long old, upd;
-
-  if (VM_Version::has_LoadAndALUAtomicV1()) {
-    __asm__ __volatile__ (
-      "   LGHI     2,1                     \n\t" // load increment
-      "   LA       3,%[mem]                \n\t" // force data address into ARG2
-//    "   LAAG     %[upd],%[inc],%[mem]    \n\t" // increment and get old value
-//    "   LAAG     2,2,0(3)                \n\t" // actually coded instruction
-      "   .byte    0xeb                    \n\t" // LAA main opcode
-      "   .byte    0x22                    \n\t" // R1,R3
-      "   .byte    0x30                    \n\t" // R2,disp1
-      "   .byte    0x00                    \n\t" // disp2,disp3
-      "   .byte    0x00                    \n\t" // disp4,disp5
-      "   .byte    0xe8                    \n\t" // LAA minor opcode
-      "   AGHI     2,1                     \n\t" // calc new value in register
-      "   LR       %[upd],2                \n\t" // move to result register
-      //---<  outputs  >---
-      : [upd]  "=&d" (upd)    // write-only, updated counter value
-      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-//    : [inc]  "a"   (inc)    // read-only.
-      //---<  clobbered  >---
-      : "cc", "r2", "r3", "memory"
-    );
-  } else {
-    __asm__ __volatile__ (
-      "   LG       %[old],%[mem]           \n\t" // get old value
-      "0: LA       %[upd],1(,%[old])       \n\t" // calc result
-      "   CSG      %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
-      "   JNE      0b                      \n\t" // no success? -> retry
-      //---<  outputs  >---
-      : [old] "=&a" (old)    // write-only, old counter value
-      , [upd] "=&d" (upd)    // write-only, updated counter value
-      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-      //---<  clobbered  >---
-      : "cc", "memory"
-    );
-  }
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-//------------
-// Atomic::dec
-//------------
-// These methods force the value in memory to be decremented (augmented by -1).
-// Both, memory value and decrement, are treated as 32bit signed binary integers.
-// No overflow exceptions are recognized, and the condition code does not hold
-// information about the value in memory.
-//
-// The value in memory is updated by using a compare-and-swap instruction. The
-// instruction is retried as often as required.
-
-inline void Atomic::dec(volatile jint* dest) {
-  unsigned int old, upd;
-
-  if (VM_Version::has_LoadAndALUAtomicV1()) {
-    __asm__ __volatile__ (
-      "   LGHI     2,-1                    \n\t" // load increment
-      "   LA       3,%[mem]                \n\t" // force data address into ARG2
-//    "   LAA      %[upd],%[inc],%[mem]    \n\t" // increment and get old value
-//    "   LAA      2,2,0(3)                \n\t" // actually coded instruction
-      "   .byte    0xeb                    \n\t" // LAA main opcode
-      "   .byte    0x22                    \n\t" // R1,R3
-      "   .byte    0x30                    \n\t" // R2,disp1
-      "   .byte    0x00                    \n\t" // disp2,disp3
-      "   .byte    0x00                    \n\t" // disp4,disp5
-      "   .byte    0xf8                    \n\t" // LAA minor opcode
-      "   AGHI     2,-1                    \n\t" // calc new value in register
-      "   LR       %[upd],2                \n\t" // move to result register
-      //---<  outputs  >---
-      : [upd]  "=&d" (upd)    // write-only, updated counter value
-      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-//    : [inc]  "a"   (inc)    // read-only.
-      //---<  clobbered  >---
-      : "cc", "r2", "r3", "memory"
-    );
-  } else {
-    __asm__ __volatile__ (
-      "   LLGF     %[old],%[mem]           \n\t" // get old value
-  // LAY not supported by inline assembler
-  //  "0: LAY      %[upd],-1(,%[old])      \n\t" // calc result
-      "0: LR       %[upd],%[old]           \n\t" // calc result
-      "   AHI      %[upd],-1               \n\t"
-      "   CS       %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
-      "   JNE      0b                      \n\t" // no success? -> retry
-      //---<  outputs  >---
-      : [old] "=&a" (old)    // write-only, old counter value
-      , [upd] "=&d" (upd)    // write-only, updated counter value
-      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-      //---<  clobbered  >---
-      : "cc", "memory"
-    );
-  }
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  unsigned long old, upd;
-
-  if (VM_Version::has_LoadAndALUAtomicV1()) {
-    __asm__ __volatile__ (
-      "   LGHI     2,-1                    \n\t" // load increment
-      "   LA       3,%[mem]                \n\t" // force data address into ARG2
-//    "   LAAG     %[upd],%[inc],%[mem]    \n\t" // increment and get old value
-//    "   LAAG     2,2,0(3)                \n\t" // actually coded instruction
-      "   .byte    0xeb                    \n\t" // LAA main opcode
-      "   .byte    0x22                    \n\t" // R1,R3
-      "   .byte    0x30                    \n\t" // R2,disp1
-      "   .byte    0x00                    \n\t" // disp2,disp3
-      "   .byte    0x00                    \n\t" // disp4,disp5
-      "   .byte    0xe8                    \n\t" // LAA minor opcode
-      "   AGHI     2,-1                    \n\t" // calc new value in register
-      "   LR       %[upd],2                \n\t" // move to result register
-      //---<  outputs  >---
-      : [upd]  "=&d" (upd)    // write-only, updated counter value
-      , [mem]  "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-//    : [inc]  "a"   (inc)    // read-only.
-      //---<  clobbered  >---
-      : "cc", "r2", "r3", "memory"
-    );
-  } else {
-    __asm__ __volatile__ (
-      "   LG       %[old],%[mem]           \n\t" // get old value
-//    LAY not supported by inline assembler
-//    "0: LAY      %[upd],-1(,%[old])      \n\t" // calc result
-      "0: LGR      %[upd],%[old]           \n\t" // calc result
-      "   AGHI     %[upd],-1               \n\t"
-      "   CSG      %[old],%[upd],%[mem]    \n\t" // try to xchg res with mem
-      "   JNE      0b                      \n\t" // no success? -> retry
-      //---<  outputs  >---
-      : [old] "=&a" (old)    // write-only, old counter value
-      , [upd] "=&d" (upd)    // write-only, updated counter value
-      , [mem] "+Q"  (*dest)  // read/write, memory to be updated atomically
-      //---<  inputs  >---
-      :
-      //---<  clobbered  >---
-      : "cc", "memory"
-    );
-  }
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
 //-------------
 // Atomic::xchg
 //-------------
--- a/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp
+++ b/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp
@ -41,14 +41,6 @@ inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *
 inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
 inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }

-inline void Atomic::inc    (volatile jint*     dest) { (void)add    (1, dest); }
-inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
-inline void Atomic::inc_ptr(volatile void*     dest) { (void)add_ptr(1, dest); }
-
-inline void Atomic::dec    (volatile jint*     dest) { (void)add    (-1, dest); }
-inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
-inline void Atomic::dec_ptr(volatile void*     dest) { (void)add_ptr(-1, dest); }
-
 inline jlong Atomic::load(const volatile jlong* src) { return *src; }

 template<size_t byte_size>
--- a/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp
+++ b/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp
@ -61,24 +61,6 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest) co
  return old_value;
 }

-inline void Atomic::inc    (volatile jint*     dest) {
-  __asm__ volatile (  "lock addl $1,(%0)" :
-                    : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  inc_ptr((volatile intptr_t*)dest);
-}
-
-inline void Atomic::dec    (volatile jint*     dest) {
-  __asm__ volatile (  "lock subl $1,(%0)" :
-                    : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  dec_ptr((volatile intptr_t*)dest);
-}
-
 inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
  __asm__ volatile (  "xchgl (%2),%0"
                    : "=r" (exchange_value)
@ -136,20 +118,6 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) co
  return old_value;
 }

-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  __asm__ __volatile__ ("lock addq $1,(%0)"
-                        :
-                        : "r" (dest)
-                        : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  __asm__ __volatile__ ("lock subq $1,(%0)"
-                        :
-                        : "r" (dest)
-                        : "cc", "memory");
-}
-
 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
  __asm__ __volatile__ ("xchgq (%2),%0"
                        : "=r" (exchange_value)
@ -176,14 +144,6 @@ inline jlong Atomic::load(const volatile jlong* src) { return *src; }

 #else // !AMD64

-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  inc((volatile jint*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  dec((volatile jint*)dest);
-}
-
 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
  return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
 }
--- a/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp
+++ b/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp
@ -201,30 +201,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
  return __sync_add_and_fetch(dest, add_value);
 }

-inline void Atomic::inc(volatile jint* dest) {
-  add(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  add_ptr(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
-  add_ptr(1, dest);
-}
-
-inline void Atomic::dec(volatile jint* dest) {
-  add(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  add_ptr(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
-  add_ptr(-1, dest);
-}
-
 inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
 #ifdef ARM
  return arm_lock_test_and_set(dest, exchange_value);
--- a/src/hotspot/os_cpu/linux_zero/orderAccess_linux_zero.inline.hpp
+++ b/src/hotspot/os_cpu/linux_zero/orderAccess_linux_zero.inline.hpp
@ -56,8 +56,16 @@ typedef void (__kernel_dmb_t) (void);

 #else // PPC

+#ifdef ALPHA
+
+#define LIGHT_MEM_BARRIER __sync_synchronize()
+
+#else // ALPHA
+
 #define LIGHT_MEM_BARRIER __asm __volatile ("":::"memory")

+#endif // ALPHA
+
 #endif // PPC

 #endif // ARM
--- a/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp
+++ b/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp
@ -39,15 +39,6 @@ inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *
 inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
 inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }

-inline void Atomic::inc    (volatile jint*     dest) { (void)add    (1, dest); }
-inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
-inline void Atomic::inc_ptr(volatile void*     dest) { (void)add_ptr(1, dest); }
-
-inline void Atomic::dec    (volatile jint*     dest) { (void)add    (-1, dest); }
-inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
-inline void Atomic::dec_ptr(volatile void*     dest) { (void)add_ptr(-1, dest); }
-
-
 inline void Atomic::store(jlong store_value, jlong* dest) { *dest = store_value; }
 inline void Atomic::store(jlong store_value, volatile jlong* dest) { *dest = store_value; }
 inline jlong Atomic::load(const volatile jlong* src) { return *src; }
--- a/src/hotspot/os_cpu/solaris_sparc/vm_version_solaris_sparc.cpp
+++ b/src/hotspot/os_cpu/solaris_sparc/vm_version_solaris_sparc.cpp
@ -380,7 +380,7 @@ void VM_Version::platform_features() {
  if (av & AV_SPARC_CRC32C)       features |= ISA_crc32c_msk;

 #ifndef AV2_SPARC_FJATHPLUS
-#define AV2_SPARC_FJATHPLUS  0x00000001 // Fujitsu Athena+
+#define AV2_SPARC_FJATHPLUS  0x00000001 // Fujitsu Athena+ insns
 #endif
 #ifndef AV2_SPARC_VIS3B
 #define AV2_SPARC_VIS3B      0x00000002 // VIS3 present on multiple chips
@ -405,6 +405,34 @@ void VM_Version::platform_features() {
 #endif
 #ifndef AV2_SPARC_VAMASK
 #define AV2_SPARC_VAMASK     0x00000100 // Virtual Address masking
+#endif
+
+#ifndef AV2_SPARC_SPARC6
+#define AV2_SPARC_SPARC6     0x00000200 // REVB*, FPSLL*, RDENTROPY, LDM* and STM*
+#endif
+#ifndef AV2_SPARC_DICTUNP
+#define AV2_SPARC_DICTUNP    0x00002000 // Dictionary unpack instruction
+#endif
+#ifndef AV2_SPARC_FPCMPSHL
+#define AV2_SPARC_FPCMPSHL   0x00004000 // Partition compare with shifted result
+#endif
+#ifndef AV2_SPARC_RLE
+#define AV2_SPARC_RLE        0x00008000 // Run-length encoded burst and length
+#endif
+#ifndef AV2_SPARC_SHA3
+#define AV2_SPARC_SHA3       0x00010000 // SHA3 instructions
+#endif
+#ifndef AV2_SPARC_FJATHPLUS2
+#define AV2_SPARC_FJATHPLUS2 0x00020000 // Fujitsu Athena++ insns
+#endif
+#ifndef AV2_SPARC_VIS3C
+#define AV2_SPARC_VIS3C      0x00040000 // Subset of VIS3 insns provided by Athena++
+#endif
+#ifndef AV2_SPARC_SPARC5B
+#define AV2_SPARC_SPARC5B    0x00080000 // subset of SPARC5 insns (fpadd8, fpsub8)
+#endif
+#ifndef AV2_SPARC_MME
+#define AV2_SPARC_MME        0x00100000 // Misaligned Mitigation Enable
 #endif

  if (avn > 1) {
@ -419,19 +447,30 @@ void VM_Version::platform_features() {
    if (av2 & AV2_SPARC_XMONT)      features |= ISA_xmont_msk;
    if (av2 & AV2_SPARC_PAUSE_NSEC) features |= ISA_pause_nsec_msk;
    if (av2 & AV2_SPARC_VAMASK)     features |= ISA_vamask_msk;
+
+    if (av2 & AV2_SPARC_SPARC6)     features |= ISA_sparc6_msk;
+    if (av2 & AV2_SPARC_DICTUNP)    features |= ISA_dictunp_msk;
+    if (av2 & AV2_SPARC_FPCMPSHL)   features |= ISA_fpcmpshl_msk;
+    if (av2 & AV2_SPARC_RLE)        features |= ISA_rle_msk;
+    if (av2 & AV2_SPARC_SHA3)       features |= ISA_sha3_msk;
+    if (av2 & AV2_SPARC_FJATHPLUS2) features |= ISA_fjathplus2_msk;
+    if (av2 & AV2_SPARC_VIS3C)      features |= ISA_vis3c_msk;
+    if (av2 & AV2_SPARC_SPARC5B)    features |= ISA_sparc5b_msk;
+    if (av2 & AV2_SPARC_MME)        features |= ISA_mme_msk;
  }

  _features = features;     // ISA feature set completed, update state.

  Sysinfo machine(SI_MACHINE);

-  bool is_sun4v = machine.match("sun4v");   // All Oracle SPARC + Fujitsu Athena+
+  bool is_sun4v = machine.match("sun4v");   // All Oracle SPARC + Fujitsu Athena+/++
  bool is_sun4u = machine.match("sun4u");   // All other Fujitsu

-  // Handle Athena+ conservatively (simply because we are lacking info.).
+  // Handle Athena+/++ conservatively (simply because we are lacking info.).

-  bool do_sun4v = is_sun4v && !has_athena_plus();
-  bool do_sun4u = is_sun4u ||  has_athena_plus();
+  bool an_athena = has_athena_plus() || has_athena_plus2();
+  bool do_sun4v  = is_sun4v && !an_athena;
+  bool do_sun4u  = is_sun4u ||  an_athena;

  uint64_t synthetic = 0;

@ -441,16 +480,16 @@ void VM_Version::platform_features() {
    // Fast IDIV, BIS and LD available on Niagara Plus.
    if (has_vis2()) {
      synthetic |= (CPU_fast_idiv_msk | CPU_fast_ld_msk);
-      // ...on Core S4 however, we prefer not to use BIS.
+      // ...on Core C4 however, we prefer not to use BIS.
      if (!has_sparc5()) {
        synthetic |= CPU_fast_bis_msk;
      }
    }
-    // Niagara Core S3 supports fast RDPC and block zeroing.
+    // SPARC Core C3 supports fast RDPC and block zeroing.
    if (has_ima()) {
      synthetic |= (CPU_fast_rdpc_msk | CPU_blk_zeroing_msk);
    }
-    // Niagara Core S3 and S4 have slow CMOVE.
+    // SPARC Core C3 and C4 have slow CMOVE.
    if (!has_ima()) {
      synthetic |= CPU_fast_cmove_msk;
    }
--- a/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp
+++ b/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp
@ -39,14 +39,6 @@ inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *
 inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
 inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }

-inline void Atomic::inc    (volatile jint*     dest) { (void)add    (1, dest); }
-inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
-inline void Atomic::inc_ptr(volatile void*     dest) { (void)add_ptr(1, dest); }
-
-inline void Atomic::dec    (volatile jint*     dest) { (void)add    (-1, dest); }
-inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
-inline void Atomic::dec_ptr(volatile void*     dest) { (void)add_ptr(-1, dest); }
-
 // For Sun Studio - implementation is in solaris_x86_64.il.

 extern "C" {
--- a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp
+++ b/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp
@ -81,30 +81,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
  return add_using_helper<intptr_t>(os::atomic_add_ptr_func, add_value, dest);
 }

-inline void Atomic::inc    (volatile jint*     dest) {
-  (void)add    (1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  (void)add_ptr(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  (void)add_ptr(1, dest);
-}
-
-inline void Atomic::dec    (volatile jint*     dest) {
-  (void)add    (-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  (void)add_ptr(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  (void)add_ptr(-1, dest);
-}
-
 inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
  return (jint)(*os::atomic_xchg_func)(exchange_value, dest);
 }
@ -152,38 +128,6 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) co
  }
 }

-inline void Atomic::inc    (volatile jint*     dest) {
-  // alternative for InterlockedIncrement
-  __asm {
-    mov edx, dest;
-    lock add dword ptr [edx], 1;
-  }
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-  inc((volatile jint*)dest);
-}
-
-inline void Atomic::inc_ptr(volatile void*     dest) {
-  inc((volatile jint*)dest);
-}
-
-inline void Atomic::dec    (volatile jint*     dest) {
-  // alternative for InterlockedDecrement
-  __asm {
-    mov edx, dest;
-    lock sub dword ptr [edx], 1;
-  }
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-  dec((volatile jint*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile void*     dest) {
-  dec((volatile jint*)dest);
-}
-
 inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
  // alternative for InterlockedExchange
  __asm {
--- a/src/hotspot/share/ci/ciInstanceKlass.cpp
+++ b/src/hotspot/share/ci/ciInstanceKlass.cpp
@ -665,9 +665,8 @@ class StaticFinalFieldPrinter : public FieldClosure {
            _out->print_cr("null");
          } else if (value->is_instance()) {
            if (value->is_a(SystemDictionary::String_klass())) {
-              _out->print("\"");
-              _out->print_raw(java_lang_String::as_quoted_ascii(value));
-              _out->print_cr("\"");
+              const char* ascii_value = java_lang_String::as_quoted_ascii(value);
+              _out->print("\"%s\"", (ascii_value != NULL) ? ascii_value : "");
            } else {
              const char* klass_name  = value->klass()->name()->as_quoted_ascii();
              _out->print_cr("%s", klass_name);
--- a/src/hotspot/share/classfile/classLoader.cpp
+++ b/src/hotspot/share/classfile/classLoader.cpp
@ -802,7 +802,6 @@ void ClassLoader::setup_search_path(const char *class_path, bool bootstrap_searc
          if (DumpSharedSpaces) {
            JImageFile *jimage = _jrt_entry->jimage();
            assert(jimage != NULL, "No java runtime image file present");
-            ClassLoader::initialize_module_loader_map(jimage);
          }
 #endif
        }
@ -1144,61 +1143,6 @@ int ClassLoader::crc32(int crc, const char* buf, int len) {
  return (*Crc32)(crc, (const jbyte*)buf, len);
 }

-#if INCLUDE_CDS
-void ClassLoader::initialize_module_loader_map(JImageFile* jimage) {
-  if (!DumpSharedSpaces) {
-    return; // only needed for CDS dump time
-  }
-
-  ResourceMark rm;
-  jlong size;
-  JImageLocationRef location = (*JImageFindResource)(jimage, JAVA_BASE_NAME, get_jimage_version_string(), MODULE_LOADER_MAP, &size);
-  if (location == 0) {
-    vm_exit_during_initialization(
-      "Cannot find ModuleLoaderMap location from modules jimage.", NULL);
-  }
-  char* buffer = NEW_RESOURCE_ARRAY(char, size + 1);
-  buffer[size] = '\0';
-  jlong read = (*JImageGetResource)(jimage, location, buffer, size);
-  if (read != size) {
-    vm_exit_during_initialization(
-      "Cannot find ModuleLoaderMap resource from modules jimage.", NULL);
-  }
-  char* char_buf = (char*)buffer;
-  int buflen = (int)strlen(char_buf);
-  char* begin_ptr = char_buf;
-  char* end_ptr = strchr(begin_ptr, '\n');
-  bool process_boot_modules = false;
-  _boot_modules_array = new (ResourceObj::C_HEAP, mtModule)
-    GrowableArray<char*>(INITIAL_BOOT_MODULES_ARRAY_SIZE, true);
-  _platform_modules_array = new (ResourceObj::C_HEAP, mtModule)
-    GrowableArray<char*>(INITIAL_PLATFORM_MODULES_ARRAY_SIZE, true);
-  while (end_ptr != NULL && (end_ptr - char_buf) < buflen) {
-    // Allocate a buffer from the C heap to be appended to the _boot_modules_array
-    // or the _platform_modules_array.
-    char* temp_name = NEW_C_HEAP_ARRAY(char, (size_t)(end_ptr - begin_ptr + 1), mtInternal);
-    strncpy(temp_name, begin_ptr, end_ptr - begin_ptr);
-    temp_name[end_ptr - begin_ptr] = '\0';
-    if (strncmp(temp_name, "BOOT", 4) == 0) {
-      process_boot_modules = true;
-      FREE_C_HEAP_ARRAY(char, temp_name);
-    } else if (strncmp(temp_name, "PLATFORM", 8) == 0) {
-      process_boot_modules = false;
-      FREE_C_HEAP_ARRAY(char, temp_name);
-    } else {
-      // module name
-      if (process_boot_modules) {
-        _boot_modules_array->append(temp_name);
-      } else {
-        _platform_modules_array->append(temp_name);
-      }
-    }
-    begin_ptr = ++end_ptr;
-    end_ptr = strchr(begin_ptr, '\n');
-  }
-}
-#endif
-
 // Function add_package extracts the package from the fully qualified class name
 // and checks if the package is in the boot loader's package entry table.  If so,
 // then it sets the classpath_index in the package entry record.
@ -1290,58 +1234,6 @@ objArrayOop ClassLoader::get_system_packages(TRAPS) {
  return result();
 }

-#if INCLUDE_CDS
-s2 ClassLoader::module_to_classloader(const char* module_name) {
-
-  assert(DumpSharedSpaces, "dump time only");
-  assert(_boot_modules_array != NULL, "_boot_modules_array is NULL");
-  assert(_platform_modules_array != NULL, "_platform_modules_array is NULL");
-
-  int array_size = _boot_modules_array->length();
-  for (int i = 0; i < array_size; i++) {
-    if (strcmp(module_name, _boot_modules_array->at(i)) == 0) {
-      return BOOT_LOADER;
-    }
-  }
-
-  array_size = _platform_modules_array->length();
-  for (int i = 0; i < array_size; i++) {
-    if (strcmp(module_name, _platform_modules_array->at(i)) == 0) {
-      return PLATFORM_LOADER;
-    }
-  }
-
-  return APP_LOADER;
-}
-
-s2 ClassLoader::classloader_type(Symbol* class_name, ClassPathEntry* e, int classpath_index, TRAPS) {
-  assert(DumpSharedSpaces, "Only used for CDS dump time");
-
-  // obtain the classloader type based on the class name.
-  // First obtain the package name based on the class name. Then obtain
-  // the classloader type based on the package name from the jimage using
-  // a jimage API. If the classloader type cannot be found from the
-  // jimage, it is determined by the class path entry.
-  jshort loader_type = ClassLoader::APP_LOADER;
-  if (e->is_jrt()) {
-    ResourceMark rm;
-    TempNewSymbol pkg_name = InstanceKlass::package_from_name(class_name, CHECK_0);
-    if (pkg_name != NULL) {
-      const char* pkg_name_C_string = (const char*)(pkg_name->as_C_string());
-      ClassPathImageEntry* cpie = (ClassPathImageEntry*)e;
-      JImageFile* jimage = cpie->jimage();
-      char* module_name = (char*)(*JImagePackageToModule)(jimage, pkg_name_C_string);
-      if (module_name != NULL) {
-        loader_type = ClassLoader::module_to_classloader(module_name);
-      }
-    }
-  } else if (ClassLoaderExt::is_boot_classpath(classpath_index)) {
-    loader_type = ClassLoader::BOOT_LOADER;
-  }
-  return loader_type;
-}
-#endif
-
 // caller needs ResourceMark
 const char* ClassLoader::file_name_for_class_name(const char* class_name,
                                                  int class_name_len) {
--- a/src/hotspot/share/classfile/classLoader.hpp
+++ b/src/hotspot/share/classfile/classLoader.hpp
@ -37,13 +37,6 @@
 // Name of boot "modules" image
 #define  MODULES_IMAGE_NAME "modules"

-// Name of the resource containing mapping from module names to defining class loader type
-#define MODULE_LOADER_MAP "jdk/internal/vm/cds/resources/ModuleLoaderMap.dat"
-
-// Initial sizes of the following arrays are based on the generated ModuleLoaderMap.dat
-#define INITIAL_BOOT_MODULES_ARRAY_SIZE 30
-#define INITIAL_PLATFORM_MODULES_ARRAY_SIZE  15
-
 // Class path entry (directory or zip file)

 class JImageFile;
@ -403,7 +396,8 @@ class ClassLoader: AllStatic {
  static int compute_Object_vtable();

  static ClassPathEntry* classpath_entry(int n) {
-    assert(n >= 0 && n < _num_entries, "sanity");
+    assert(n >= 0, "sanity");
+    assert(!has_jrt_entry() || n < _num_entries, "sanity");
    if (n == 0) {
      assert(has_jrt_entry(), "No class path entry at 0 for exploded module builds");
      return ClassLoader::_jrt_entry;
@ -438,10 +432,6 @@ class ClassLoader: AllStatic {
  static bool  check_shared_paths_misc_info(void* info, int size);
  static void  exit_with_path_failure(const char* error, const char* message);

-  static s2 module_to_classloader(const char* module_name);
-  static void initialize_module_loader_map(JImageFile* jimage);
-  static s2 classloader_type(Symbol* class_name, ClassPathEntry* e,
-                             int classpath_index, TRAPS);
  static void record_shared_class_loader_type(InstanceKlass* ik, const ClassFileStream* stream);
 #endif
  static JImageLocationRef jimage_find_resource(JImageFile* jf, const char* module_name,
--- a/src/hotspot/share/classfile/defaultMethods.cpp
+++ b/src/hotspot/share/classfile/defaultMethods.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -767,15 +767,14 @@ static void create_default_methods( InstanceKlass* klass,
 // This is the guts of the default methods implementation.  This is called just
 // after the classfile has been parsed if some ancestor has default methods.
 //
-// First if finds any name/signature slots that need any implementation (either
+// First it finds any name/signature slots that need any implementation (either
 // because they are miranda or a superclass's implementation is an overpass
 // itself).  For each slot, iterate over the hierarchy, to see if they contain a
 // signature that matches the slot we are looking at.
 //
-// For each slot filled, we generate an overpass method that either calls the
-// unique default method candidate using invokespecial, or throws an exception
-// (in the case of no default method candidates, or more than one valid
-// candidate).  These methods are then added to the class's method list.
+// For each slot filled, we either record the default method candidate in the
+// klass default_methods list or, only to handle exception cases, we create an
+// overpass method that throws an exception and add it to the klass methods list.
 // The JVM does not create bridges nor handle generic signatures here.
 void DefaultMethods::generate_default_methods(
    InstanceKlass* klass, const GrowableArray<Method*>* mirandas, TRAPS) {
@ -901,6 +900,11 @@ static void switchover_constant_pool(BytecodeConstantPool* bpool,
 // This allows virtual methods to override the overpass, but ensures
 // that a local method search will find the exception rather than an abstract
 // or default method that is not a valid candidate.
+//
+// Note that if overpass method are ever created that are not exception
+// throwing methods then the loader constraint checking logic for vtable and
+// itable creation needs to be changed to check loader constraints for the
+// overpass methods that do not throw exceptions.
 static void create_defaults_and_exceptions(
    GrowableArray<EmptyVtableSlot*>* slots,
    InstanceKlass* klass, TRAPS) {
--- a/src/hotspot/share/classfile/vmSymbols.hpp
+++ b/src/hotspot/share/classfile/vmSymbols.hpp
@ -461,6 +461,8 @@
  template(getProtectionDomain_signature,             "(Ljava/security/CodeSource;)Ljava/security/ProtectionDomain;") \
  template(url_code_signer_array_void_signature,      "(Ljava/net/URL;[Ljava/security/CodeSigner;)V") \
  template(module_entry_name,                         "module_entry")                             \
+  template(resolved_references_name,                  "<resolved_references>")                    \
+  template(init_lock_name,                            "<init_lock>")                              \
                                                                                                  \
  /* name symbols needed by intrinsics */                                                         \
  VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, template, VM_SYMBOL_IGNORE, VM_ALIAS_IGNORE) \
--- a/src/hotspot/share/compiler/oopMap.cpp
+++ b/src/hotspot/share/compiler/oopMap.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -40,9 +40,6 @@
 #ifdef COMPILER2
 #include "opto/optoreg.hpp"
 #endif
-#ifdef SPARC
-#include "vmreg_sparc.inline.hpp"
-#endif

 // OopMapStream

--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp
@ -1075,7 +1075,7 @@ ConcurrentMarkSweepGeneration::par_promote(int thread_num,
                        obj_ptr, old->is_objArray(), word_sz);

  NOT_PRODUCT(
-    Atomic::inc_ptr(&_numObjectsPromoted);
+    Atomic::inc(&_numObjectsPromoted);
    Atomic::add_ptr(alloc_sz, &_numWordsPromoted);
  )

@ -7974,7 +7974,7 @@ void CMSCollector::push_on_overflow_list(oop p) {

 // Multi-threaded; use CAS to prepend to overflow list
 void CMSCollector::par_push_on_overflow_list(oop p) {
-  NOT_PRODUCT(Atomic::inc_ptr(&_num_par_pushes);)
+  NOT_PRODUCT(Atomic::inc(&_num_par_pushes);)
  assert(oopDesc::is_oop(p), "Not an oop");
  par_preserve_mark_if_necessary(p);
  oop observed_overflow_list = _overflow_list;
--- a/src/hotspot/share/gc/cms/parNewGeneration.cpp
+++ b/src/hotspot/share/gc/cms/parNewGeneration.cpp
@ -1281,7 +1281,7 @@ void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadSt
    // XXX This is horribly inefficient when a promotion failure occurs
    // and should be fixed. XXX FIX ME !!!
 #ifndef PRODUCT
-    Atomic::inc_ptr(&_num_par_pushes);
+    Atomic::inc(&_num_par_pushes);
    assert(_num_par_pushes > 0, "Tautology");
 #endif
    if (from_space_obj->forwardee() == from_space_obj) {
--- a/src/hotspot/share/gc/g1/g1StringDedupQueue.cpp
+++ b/src/hotspot/share/gc/g1/g1StringDedupQueue.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -90,7 +90,7 @@ void G1StringDedupQueue::push(uint worker_id, oop java_string) {
    }
  } else {
    // Queue is full, drop the string and update the statistics
-    Atomic::inc_ptr(&_queue->_dropped);
+    Atomic::inc(&_queue->_dropped);
  }
 }

--- a/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
+++ b/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -89,7 +89,7 @@ ParMarkBitMap::mark_obj(HeapWord* addr, size_t size)
    const idx_t end_bit = addr_to_bit(addr + size - 1);
    bool end_bit_ok = _end_bits.par_set_bit(end_bit);
    assert(end_bit_ok, "concurrency problem");
-    DEBUG_ONLY(Atomic::inc_ptr(&mark_bitmap_count));
+    DEBUG_ONLY(Atomic::inc(&mark_bitmap_count));
    DEBUG_ONLY(Atomic::add_ptr(size, &mark_bitmap_size));
    return true;
  }
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -574,16 +574,10 @@ void ParallelScavengeHeap::print_gc_threads_on(outputStream* st) const {
 }

 void ParallelScavengeHeap::print_tracing_info() const {
-  if (TraceYoungGenTime) {
-    double time = PSScavenge::accumulated_time()->seconds();
-    tty->print_cr("[Accumulated GC generation 0 time %3.7f secs]", time);
-  }
-  if (TraceOldGenTime) {
-    double time = UseParallelOldGC ? PSParallelCompact::accumulated_time()->seconds() : PSMarkSweep::accumulated_time()->seconds();
-    tty->print_cr("[Accumulated GC generation 1 time %3.7f secs]", time);
-  }
-
  AdaptiveSizePolicyOutput::print();
+  log_debug(gc, heap, exit)("Accumulated young generation GC time %3.7f secs", PSScavenge::accumulated_time()->seconds());
+  log_debug(gc, heap, exit)("Accumulated old generation GC time %3.7f secs",
+      UseParallelOldGC ? PSParallelCompact::accumulated_time()->seconds() : PSMarkSweep::accumulated_time()->seconds());
 }


--- a/src/hotspot/share/gc/parallel/psMarkSweep.cpp
+++ b/src/hotspot/share/gc/parallel/psMarkSweep.cpp
@ -173,7 +173,9 @@ bool PSMarkSweep::invoke_no_policy(bool clear_all_softrefs) {
    TraceCollectorStats tcs(counters());
    TraceMemoryManagerStats tms(true /* Full GC */,gc_cause);

-    if (TraceOldGenTime) accumulated_time()->start();
+    if (log_is_enabled(Debug, gc, heap, exit)) {
+      accumulated_time()->start();
+    }

    // Let the size policy know we're starting
    size_policy->major_collection_begin();
@ -342,7 +344,9 @@ bool PSMarkSweep::invoke_no_policy(bool clear_all_softrefs) {
    // We collected the heap, recalculate the metaspace capacity
    MetaspaceGC::compute_new_size();

-    if (TraceOldGenTime) accumulated_time()->stop();
+    if (log_is_enabled(Debug, gc, heap, exit)) {
+      accumulated_time()->stop();
+    }

    young_gen->print_used_change(young_gen_prev_used);
    old_gen->print_used_change(old_gen_prev_used);
--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
@ -520,7 +520,7 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len)
  const size_t beg_region = obj_ofs >> Log2RegionSize;
  const size_t end_region = (obj_ofs + len - 1) >> Log2RegionSize;

-  DEBUG_ONLY(Atomic::inc_ptr(&add_obj_count);)
+  DEBUG_ONLY(Atomic::inc(&add_obj_count);)
  DEBUG_ONLY(Atomic::add_ptr(len, &add_obj_size);)

  if (beg_region == end_region) {
@ -1778,7 +1778,9 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
    TraceCollectorStats tcs(counters());
    TraceMemoryManagerStats tms(true /* Full GC */,gc_cause);

-    if (TraceOldGenTime) accumulated_time()->start();
+    if (log_is_enabled(Debug, gc, heap, exit)) {
+      accumulated_time()->start();
+    }

    // Let the size policy know we're starting
    size_policy->major_collection_begin();
@ -1897,7 +1899,7 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
    // Resize the metaspace capacity after a collection
    MetaspaceGC::compute_new_size();

-    if (TraceOldGenTime) {
+    if (log_is_enabled(Debug, gc, heap, exit)) {
      accumulated_time()->stop();
    }

--- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp
@ -517,7 +517,7 @@ ParallelCompactData::RegionData::set_blocks_filled()
  OrderAccess::release();
  _blocks_filled = true;
  // Debug builds count the number of times the table was filled.
-  DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count));
+  DEBUG_ONLY(Atomic::inc(&_blocks_filled_count));
 }

 inline void
--- a/src/hotspot/share/gc/parallel/psScavenge.cpp
+++ b/src/hotspot/share/gc/parallel/psScavenge.cpp
@ -306,7 +306,9 @@ bool PSScavenge::invoke_no_policy() {
    TraceCollectorStats tcs(counters());
    TraceMemoryManagerStats tms(false /* not full GC */,gc_cause);

-    if (TraceYoungGenTime) accumulated_time()->start();
+    if (log_is_enabled(Debug, gc, heap, exit)) {
+      accumulated_time()->start();
+    }

    // Let the size policy know we're starting
    size_policy->minor_collection_begin();
@ -607,7 +609,9 @@ bool PSScavenge::invoke_no_policy() {
      CardTableExtension::verify_all_young_refs_imprecise();
    }

-    if (TraceYoungGenTime) accumulated_time()->stop();
+    if (log_is_enabled(Debug, gc, heap, exit)) {
+      accumulated_time()->stop();
+    }

    young_gen->print_used_change(pre_gc_values.young_gen_used());
    old_gen->print_used_change(pre_gc_values.old_gen_used());
--- a/src/hotspot/share/gc/shared/genCollectedHeap.cpp
+++ b/src/hotspot/share/gc/shared/genCollectedHeap.cpp
@ -1157,11 +1157,10 @@ void GenCollectedHeap::print_on_error(outputStream* st) const {
 }

 void GenCollectedHeap::print_tracing_info() const {
-  if (TraceYoungGenTime) {
-    _young_gen->print_summary_info();
-  }
-  if (TraceOldGenTime) {
-    _old_gen->print_summary_info();
+  if (log_is_enabled(Debug, gc, heap, exit)) {
+    LogStreamHandle(Debug, gc, heap, exit) lsh;
+    _young_gen->print_summary_info_on(&lsh);
+    _old_gen->print_summary_info_on(&lsh);
  }
 }

--- a/src/hotspot/share/gc/shared/generation.cpp
+++ b/src/hotspot/share/gc/shared/generation.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -94,22 +94,14 @@ void Generation::print_on(outputStream* st)  const {
              p2i(_virtual_space.high_boundary()));
 }

-void Generation::print_summary_info() { print_summary_info_on(tty); }
-
 void Generation::print_summary_info_on(outputStream* st) {
  StatRecord* sr = stat_record();
  double time = sr->accumulated_time.seconds();
-  // I didn't want to change the logging when removing the level concept,
-  // but I guess this logging could say young/old or something instead of 0/1.
-  uint level;
-  if (GenCollectedHeap::heap()->is_young_gen(this)) {
-    level = 0;
-  } else {
-    level = 1;
-  }
-  st->print_cr("[Accumulated GC generation %d time %3.7f secs, "
-               "%u GC's, avg GC time %3.7f]",
-               level, time, sr->invocations,
+  st->print_cr("Accumulated %s generation GC time %3.7f secs, "
+               "%u GC's, avg GC time %3.7f",
+               GenCollectedHeap::heap()->is_young_gen(this) ? "young" : "old" ,
+               time,
+               sr->invocations,
               sr->invocations > 0 ? time / sr->invocations : 0.0);
 }

--- a/src/hotspot/share/gc/shared/generation.hpp
+++ b/src/hotspot/share/gc/shared/generation.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -549,7 +549,6 @@ private:
 public:
  StatRecord* stat_record() { return &_stat_record; }

-  virtual void print_summary_info();
  virtual void print_summary_info_on(outputStream* st);

  // Performance Counter support
--- a/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.cpp
+++ b/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.cpp
@ -272,7 +272,7 @@ ReferenceProcessorPhaseTimes::~ReferenceProcessorPhaseTimes() {

 double ReferenceProcessorPhaseTimes::ref_proc_time_ms(ReferenceType ref_type) const {
  ASSERT_REF_TYPE(ref_type);
-  return _par_phase_time_ms[ref_type_2_index(ref_type)];
+  return _ref_proc_time_ms[ref_type_2_index(ref_type)];
 }

 void ReferenceProcessorPhaseTimes::set_ref_proc_time_ms(ReferenceType ref_type,
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
@ -761,6 +761,14 @@
  declare_constant(VM_Version::ISA_XMONT)               \
  declare_constant(VM_Version::ISA_PAUSE_NSEC)          \
  declare_constant(VM_Version::ISA_VAMASK)              \
+  declare_constant(VM_Version::ISA_SPARC6)              \
+  declare_constant(VM_Version::ISA_DICTUNP)             \
+  declare_constant(VM_Version::ISA_FPCMPSHL)            \
+  declare_constant(VM_Version::ISA_RLE)                 \
+  declare_constant(VM_Version::ISA_SHA3)                \
+  declare_constant(VM_Version::ISA_VIS3C)               \
+  declare_constant(VM_Version::ISA_SPARC5B)             \
+  declare_constant(VM_Version::ISA_MME)                 \
  declare_constant(VM_Version::CPU_FAST_IDIV)           \
  declare_constant(VM_Version::CPU_FAST_RDPC)           \
  declare_constant(VM_Version::CPU_FAST_BIS)            \
--- a/src/hotspot/share/memory/metaspace.cpp
+++ b/src/hotspot/share/memory/metaspace.cpp
@ -1291,7 +1291,7 @@ VirtualSpaceList::VirtualSpaceList(ReservedSpace rs) :
 }

 size_t VirtualSpaceList::free_bytes() {
-  return virtual_space_list()->free_words_in_vs() * BytesPerWord;
+  return current_virtual_space()->free_words_in_vs() * BytesPerWord;
 }

 // Allocate another meta virtual space and add it to the list.
@ -2718,7 +2718,7 @@ void SpaceManager::dump(outputStream* const out) const {


 size_t MetaspaceAux::_capacity_words[] = {0, 0};
-size_t MetaspaceAux::_used_words[] = {0, 0};
+volatile size_t MetaspaceAux::_used_words[] = {0, 0};

 size_t MetaspaceAux::free_bytes(Metaspace::MetadataType mdtype) {
  VirtualSpaceList* list = Metaspace::get_space_list(mdtype);
--- a/src/hotspot/share/memory/metaspace.hpp
+++ b/src/hotspot/share/memory/metaspace.hpp
@ -273,7 +273,7 @@ class MetaspaceAux : AllStatic {
  // Running sum of space in all Metachunks that
  // are being used for metadata. One for each
  // type of Metadata.
-  static size_t _used_words[Metaspace:: MetadataTypeCount];
+  static volatile size_t _used_words[Metaspace:: MetadataTypeCount];

 public:
  // Decrement and increment _allocated_capacity_words
--- a/src/hotspot/share/memory/resourceArea.hpp
+++ b/src/hotspot/share/memory/resourceArea.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -49,11 +49,11 @@ class ResourceArea: public Arena {
  debug_only(static int _warned;)       // to suppress multiple warnings

 public:
-  ResourceArea() : Arena(mtThread) {
+  ResourceArea(MEMFLAGS flags = mtThread) : Arena(flags) {
    debug_only(_nesting = 0;)
  }

-  ResourceArea(size_t init_size) : Arena(mtThread, init_size) {
+  ResourceArea(size_t init_size, MEMFLAGS flags = mtThread) : Arena(flags, init_size) {
    debug_only(_nesting = 0;);
  }

--- a/src/hotspot/share/memory/universe.cpp
+++ b/src/hotspot/share/memory/universe.cpp
@ -1064,44 +1064,40 @@ bool universe_post_init() {

  Universe::_vm_exception = InstanceKlass::cast(k)->allocate_instance(CHECK_false);

-  if (!DumpSharedSpaces) {
-    // These are the only Java fields that are currently set during shared space dumping.
-    // We prefer to not handle this generally, so we always reinitialize these detail messages.
-    Handle msg = java_lang_String::create_from_str("Java heap space", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_java_heap, msg());
+  Handle msg = java_lang_String::create_from_str("Java heap space", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_java_heap, msg());

-    msg = java_lang_String::create_from_str("Metaspace", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_metaspace, msg());
-    msg = java_lang_String::create_from_str("Compressed class space", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_class_metaspace, msg());
+  msg = java_lang_String::create_from_str("Metaspace", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_metaspace, msg());
+  msg = java_lang_String::create_from_str("Compressed class space", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_class_metaspace, msg());

-    msg = java_lang_String::create_from_str("Requested array size exceeds VM limit", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_array_size, msg());
+  msg = java_lang_String::create_from_str("Requested array size exceeds VM limit", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_array_size, msg());

-    msg = java_lang_String::create_from_str("GC overhead limit exceeded", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_gc_overhead_limit, msg());
+  msg = java_lang_String::create_from_str("GC overhead limit exceeded", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_gc_overhead_limit, msg());

-    msg = java_lang_String::create_from_str("Java heap space: failed reallocation of scalar replaced objects", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_out_of_memory_error_realloc_objects, msg());
+  msg = java_lang_String::create_from_str("Java heap space: failed reallocation of scalar replaced objects", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_out_of_memory_error_realloc_objects, msg());

-    msg = java_lang_String::create_from_str("/ by zero", CHECK_false);
-    java_lang_Throwable::set_message(Universe::_arithmetic_exception_instance, msg());
+  msg = java_lang_String::create_from_str("/ by zero", CHECK_false);
+  java_lang_Throwable::set_message(Universe::_arithmetic_exception_instance, msg());

-    // Setup the array of errors that have preallocated backtrace
-    k = Universe::_out_of_memory_error_java_heap->klass();
-    assert(k->name() == vmSymbols::java_lang_OutOfMemoryError(), "should be out of memory error");
-    ik = InstanceKlass::cast(k);
+  // Setup the array of errors that have preallocated backtrace
+  k = Universe::_out_of_memory_error_java_heap->klass();
+  assert(k->name() == vmSymbols::java_lang_OutOfMemoryError(), "should be out of memory error");
+  ik = InstanceKlass::cast(k);

-    int len = (StackTraceInThrowable) ? (int)PreallocatedOutOfMemoryErrorCount : 0;
-    Universe::_preallocated_out_of_memory_error_array = oopFactory::new_objArray(ik, len, CHECK_false);
-    for (int i=0; i<len; i++) {
-      oop err = ik->allocate_instance(CHECK_false);
-      Handle err_h = Handle(THREAD, err);
-      java_lang_Throwable::allocate_backtrace(err_h, CHECK_false);
-      Universe::preallocated_out_of_memory_errors()->obj_at_put(i, err_h());
-    }
-    Universe::_preallocated_out_of_memory_error_avail_count = (jint)len;
+  int len = (StackTraceInThrowable) ? (int)PreallocatedOutOfMemoryErrorCount : 0;
+  Universe::_preallocated_out_of_memory_error_array = oopFactory::new_objArray(ik, len, CHECK_false);
+  for (int i=0; i<len; i++) {
+    oop err = ik->allocate_instance(CHECK_false);
+    Handle err_h = Handle(THREAD, err);
+    java_lang_Throwable::allocate_backtrace(err_h, CHECK_false);
+    Universe::preallocated_out_of_memory_errors()->obj_at_put(i, err_h());
  }
+  Universe::_preallocated_out_of_memory_error_avail_count = (jint)len;

  Universe::initialize_known_methods(CHECK_false);

--- a/src/hotspot/share/oops/constantPool.cpp
+++ b/src/hotspot/share/oops/constantPool.cpp
@ -135,6 +135,16 @@ objArrayOop ConstantPool::resolved_references() const {
  return (objArrayOop)_cache->resolved_references();
 }

+// Called from outside constant pool resolution where a resolved_reference array
+// may not be present.
+objArrayOop ConstantPool::resolved_references_or_null() const {
+  if (_cache == NULL) {
+    return NULL;
+  } else {
+    return (objArrayOop)_cache->resolved_references();
+  }
+}
+
 // Create resolved_references array and mapping array for original cp indexes
 // The ldc bytecode was rewritten to have the resolved reference array index so need a way
 // to map it back for resolving and some unlikely miscellaneous uses.
@ -284,6 +294,28 @@ void ConstantPool::archive_resolved_references(Thread* THREAD) {
    set_resolved_references(NULL);
  }
 }
+
+void ConstantPool::resolve_class_constants(TRAPS) {
+  assert(DumpSharedSpaces, "used during dump time only");
+  // The _cache may be NULL if the _pool_holder klass fails verification
+  // at dump time due to missing dependencies.
+  if (cache() == NULL || reference_map() == NULL) {
+    return; // nothing to do
+  }
+
+  constantPoolHandle cp(THREAD, this);
+  for (int index = 1; index < length(); index++) { // Index 0 is unused
+    if (tag_at(index).is_string()) {
+      Symbol* sym = cp->unresolved_string_at(index);
+      // Look up only. Only resolve references to already interned strings.
+      oop str = StringTable::lookup(sym);
+      if (str != NULL) {
+        int cache_index = cp->cp_to_object_index(index);
+        cp->string_at_put(index, cache_index, str);
+      }
+    }
+  }
+}
 #endif

 // CDS support. Create a new resolved_references array.
@ -712,22 +744,6 @@ void ConstantPool::resolve_string_constants_impl(const constantPoolHandle& this_
  }
 }

-bool ConstantPool::resolve_class_constants(TRAPS) {
-  constantPoolHandle cp(THREAD, this);
-  for (int index = 1; index < length(); index++) { // Index 0 is unused
-    if (tag_at(index).is_string()) {
-      Symbol* sym = cp->unresolved_string_at(index);
-      // Look up only. Only resolve references to already interned strings.
-      oop str = StringTable::lookup(sym);
-      if (str != NULL) {
-        int cache_index = cp->cp_to_object_index(index);
-        cp->string_at_put(index, cache_index, str);
-      }
-    }
-  }
-  return true;
-}
-
 Symbol* ConstantPool::exception_message(const constantPoolHandle& this_cp, int which, constantTag tag, oop pending_exception) {
  // Dig out the detailed message to reuse if possible
  Symbol* message = java_lang_Throwable::detail_message(pending_exception);
--- a/src/hotspot/share/oops/constantPool.hpp
+++ b/src/hotspot/share/oops/constantPool.hpp
@ -226,6 +226,7 @@ class ConstantPool : public Metadata {

  // resolved strings, methodHandles and callsite objects from the constant pool
  objArrayOop resolved_references()  const;
+  objArrayOop resolved_references_or_null()  const;
  // mapping resolved object array indexes to cp indexes and back.
  int object_to_cp_index(int index)         { return reference_map()->at(index); }
  int cp_to_object_index(int index);
@ -716,9 +717,9 @@ class ConstantPool : public Metadata {

  // CDS support
  void archive_resolved_references(Thread *THREAD) NOT_CDS_JAVA_HEAP_RETURN;
+  void resolve_class_constants(TRAPS) NOT_CDS_JAVA_HEAP_RETURN;
  void remove_unshareable_info();
  void restore_unshareable_info(TRAPS);
-  bool resolve_class_constants(TRAPS);
  // The ConstantPool vtable is restored by this call when the ConstantPool is
  // in the shared archive.  See patch_klass_vtables() in metaspaceShared.cpp for
  // all the gory details.  SA, dtrace and pstack helpers distinguish metadata
--- a/src/hotspot/share/oops/klassVtable.cpp
+++ b/src/hotspot/share/oops/klassVtable.cpp
@ -479,13 +479,15 @@ bool klassVtable::update_inherited_vtable(InstanceKlass* klass, const methodHand
          allocate_new = false;
        }

-        if (checkconstraints) {
-        // Override vtable entry if passes loader constraint check
-        // if loader constraint checking requested
-        // No need to visit his super, since he and his super
-        // have already made any needed loader constraints.
-        // Since loader constraints are transitive, it is enough
-        // to link to the first super, and we get all the others.
+        // Do not check loader constraints for overpass methods because overpass
+        // methods are created by the jvm to throw exceptions.
+        if (checkconstraints && !target_method()->is_overpass()) {
+          // Override vtable entry if passes loader constraint check
+          // if loader constraint checking requested
+          // No need to visit his super, since he and his super
+          // have already made any needed loader constraints.
+          // Since loader constraints are transitive, it is enough
+          // to link to the first super, and we get all the others.
          Handle super_loader(THREAD, super_klass->class_loader());

          if (target_loader() != super_loader()) {
@ -495,21 +497,23 @@ bool klassVtable::update_inherited_vtable(InstanceKlass* klass, const methodHand
                                                        super_loader, true,
                                                        CHECK_(false));
            if (failed_type_symbol != NULL) {
-              const char* msg = "loader constraint violation: when resolving "
-                "overridden method \"%s\" the class loader (instance"
-                " of %s) of the current class, %s, and its superclass loader "
-                "(instance of %s), have different Class objects for the type "
-                "%s used in the signature";
+              const char* msg = "loader constraint violation for class %s: when selecting "
+                "overriding method \"%s\" the class loader (instance of %s) of the "
+                "selected method's type %s, and the class loader (instance of %s) for its super "
+                "type %s have different Class objects for the type %s used in the signature";
+              char* curr_class = klass->name()->as_C_string();
              char* sig = target_method()->name_and_sig_as_C_string();
              const char* loader1 = SystemDictionary::loader_name(target_loader());
-              char* current = target_klass->name()->as_C_string();
+              char* sel_class = target_klass->name()->as_C_string();
              const char* loader2 = SystemDictionary::loader_name(super_loader());
+              char* super_class = super_klass->name()->as_C_string();
              char* failed_type_name = failed_type_symbol->as_C_string();
-              size_t buflen = strlen(msg) + strlen(sig) + strlen(loader1) +
-                strlen(current) + strlen(loader2) + strlen(failed_type_name);
+              size_t buflen = strlen(msg) + strlen(curr_class) + strlen(sig) +
+                strlen(loader1) + strlen(sel_class) + strlen(loader2) +
+                strlen(super_class) + strlen(failed_type_name);
              char* buf = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, buflen);
-              jio_snprintf(buf, buflen, msg, sig, loader1, current, loader2,
-                           failed_type_name);
+              jio_snprintf(buf, buflen, msg, curr_class, sig, loader1, sel_class, loader2,
+                           super_class, failed_type_name);
              THROW_MSG_(vmSymbols::java_lang_LinkageError(), buf, false);
            }
          }
@ -1193,13 +1197,15 @@ void klassItable::initialize_itable_for_interface(int method_table_offset, Klass
      // to correctly enforce loader constraints for interface method inheritance
      target = LinkResolver::lookup_instance_method_in_klasses(_klass, m->name(), m->signature(), CHECK);
    }
-    if (target == NULL || !target->is_public() || target->is_abstract()) {
-      // Entry does not resolve. Leave it empty for AbstractMethodError.
-        if (!(target == NULL) && !target->is_public()) {
-          // Stuff an IllegalAccessError throwing method in there instead.
-          itableOffsetEntry::method_entry(_klass, method_table_offset)[m->itable_index()].
-              initialize(Universe::throw_illegal_access_error());
-        }
+    if (target == NULL || !target->is_public() || target->is_abstract() || target->is_overpass()) {
+      assert(target == NULL || !target->is_overpass() || target->is_public(),
+             "Non-public overpass method!");
+      // Entry does not resolve. Leave it empty for AbstractMethodError or other error.
+      if (!(target == NULL) && !target->is_public()) {
+        // Stuff an IllegalAccessError throwing method in there instead.
+        itableOffsetEntry::method_entry(_klass, method_table_offset)[m->itable_index()].
+            initialize(Universe::throw_illegal_access_error());
+      }
    } else {
      // Entry did resolve, check loader constraints before initializing
      // if checkconstraints requested
@ -1213,24 +1219,24 @@ void klassItable::initialize_itable_for_interface(int method_table_offset, Klass
                                                      interface_loader,
                                                      true, CHECK);
          if (failed_type_symbol != NULL) {
-            const char* msg = "loader constraint violation in interface "
-              "itable initialization: when resolving method \"%s\" the class"
-              " loader (instance of %s) of the current class, %s, "
-              "and the class loader (instance of %s) for interface "
-              "%s have different Class objects for the type %s "
-              "used in the signature";
-            char* sig = target()->name_and_sig_as_C_string();
-            const char* loader1 = SystemDictionary::loader_name(method_holder_loader());
+            const char* msg = "loader constraint violation in interface itable"
+              " initialization for class %s: when selecting method \"%s\" the"
+              " class loader (instance of %s) for super interface %s, and the class"
+              " loader (instance of %s) of the selected method's type, %s have"
+              " different Class objects for the type %s used in the signature";
            char* current = _klass->name()->as_C_string();
-            const char* loader2 = SystemDictionary::loader_name(interface_loader());
+            char* sig = m->name_and_sig_as_C_string();
+            const char* loader1 = SystemDictionary::loader_name(interface_loader());
            char* iface = InstanceKlass::cast(interf)->name()->as_C_string();
+            const char* loader2 = SystemDictionary::loader_name(method_holder_loader());
+            char* mclass = target()->method_holder()->name()->as_C_string();
            char* failed_type_name = failed_type_symbol->as_C_string();
-            size_t buflen = strlen(msg) + strlen(sig) + strlen(loader1) +
-              strlen(current) + strlen(loader2) + strlen(iface) +
+            size_t buflen = strlen(msg) + strlen(current) + strlen(sig) +
+              strlen(loader1) + strlen(iface) + strlen(loader2) + strlen(mclass) +
              strlen(failed_type_name);
            char* buf = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, buflen);
-            jio_snprintf(buf, buflen, msg, sig, loader1, current, loader2,
-                         iface, failed_type_name);
+            jio_snprintf(buf, buflen, msg, current, sig, loader1, iface,
+                         loader2, mclass, failed_type_name);
            THROW_MSG(vmSymbols::java_lang_LinkageError(), buf);
          }
        }
--- a/src/hotspot/share/opto/bytecodeInfo.cpp
+++ b/src/hotspot/share/opto/bytecodeInfo.cpp
@ -644,7 +644,8 @@ InlineTree *InlineTree::build_inline_tree_for_callee( ciMethod* callee_method, J
      C->log()->elem("inline_level_discount caller='%d' callee='%d'", id1, id2);
    }
  }
-  InlineTree* ilt = new InlineTree(C, this, callee_method, caller_jvms, caller_bci, recur_frequency, _max_inline_level + max_inline_level_adjust);
+  // Allocate in the comp_arena to make sure the InlineTree is live when dumping a replay compilation file
+  InlineTree* ilt = new (C->comp_arena()) InlineTree(C, this, callee_method, caller_jvms, caller_bci, recur_frequency, _max_inline_level + max_inline_level_adjust);
  _subtrees.append(ilt);

  NOT_PRODUCT( _count_inlines += 1; )
--- a/src/hotspot/share/opto/chaitin.cpp
+++ b/src/hotspot/share/opto/chaitin.cpp
@ -348,8 +348,8 @@ void PhaseChaitin::Register_Allocate() {
  _alternate = 0;
  _matcher._allocation_started = true;

-  ResourceArea split_arena;     // Arena for Split local resources
-  ResourceArea live_arena;      // Arena for liveness & IFG info
+  ResourceArea split_arena(mtCompiler);     // Arena for Split local resources
+  ResourceArea live_arena(mtCompiler);      // Arena for liveness & IFG info
  ResourceMark rm(&live_arena);

  // Need live-ness for the IFG; need the IFG for coalescing.  If the
--- a/src/hotspot/share/opto/gcm.cpp
+++ b/src/hotspot/share/opto/gcm.cpp
@ -1424,7 +1424,7 @@ void PhaseCFG::global_code_motion() {
  // Enabling the scheduler for register pressure plus finding blocks of size to schedule for it
  // is key to enabling this feature.
  PhaseChaitin regalloc(C->unique(), *this, _matcher, true);
-  ResourceArea live_arena;      // Arena for liveness
+  ResourceArea live_arena(mtCompiler);      // Arena for liveness
  ResourceMark rm_live(&live_arena);
  PhaseLive live(*this, regalloc._lrg_map.names(), &live_arena, true);
  PhaseIFG ifg(&live_arena);
--- a/src/hotspot/share/opto/matcher.cpp
+++ b/src/hotspot/share/opto/matcher.cpp
@ -69,7 +69,7 @@ Matcher::Matcher()
  _register_save_type(register_save_type),
  _ruleName(ruleName),
  _allocation_started(false),
-  _states_arena(Chunk::medium_size),
+  _states_arena(Chunk::medium_size, mtCompiler),
  _visited(&_states_arena),
  _shared(&_states_arena),
  _dontcare(&_states_arena) {
--- a/src/hotspot/share/runtime/atomic.hpp
+++ b/src/hotspot/share/runtime/atomic.hpp
@ -97,21 +97,21 @@ class Atomic : AllStatic {
    return add(add_value, reinterpret_cast<char* volatile*>(dest));
  }

-  // Atomically increment location. inc*() provide:
+  // Atomically increment location. inc() provide:
  // <fence> increment-dest <membar StoreLoad|StoreStore>
-  inline static void inc    (volatile jint*     dest);
-  inline static void inc    (volatile jshort*   dest);
-  inline static void inc    (volatile size_t*   dest);
-  inline static void inc_ptr(volatile intptr_t* dest);
-  inline static void inc_ptr(volatile void*     dest);
+  // The type D may be either a pointer type, or an integral
+  // type. If it is a pointer type, then the increment is
+  // scaled to the size of the type pointed to by the pointer.
+  template<typename D>
+  inline static void inc(D volatile* dest);

-  // Atomically decrement a location. dec*() provide:
+  // Atomically decrement a location. dec() provide:
  // <fence> decrement-dest <membar StoreLoad|StoreStore>
-  inline static void dec    (volatile jint*     dest);
-  inline static void dec    (volatile jshort*   dest);
-  inline static void dec    (volatile size_t*   dest);
-  inline static void dec_ptr(volatile intptr_t* dest);
-  inline static void dec_ptr(volatile void*     dest);
+  // The type D may be either a pointer type, or an integral
+  // type. If it is a pointer type, then the decrement is
+  // scaled to the size of the type pointed to by the pointer.
+  template<typename D>
+  inline static void dec(D volatile* dest);

  // Performs atomic exchange of *dest with exchange_value. Returns old
  // prior value of *dest. xchg*() provide:
@ -312,6 +312,22 @@ struct Atomic::AddAndFetch VALUE_OBJ_CLASS_SPEC {
  D operator()(I add_value, D volatile* dest) const;
 };

+template<typename D>
+inline void Atomic::inc(D volatile* dest) {
+  STATIC_ASSERT(IsPointer<D>::value || IsIntegral<D>::value);
+  typedef typename Conditional<IsPointer<D>::value, ptrdiff_t, D>::type I;
+  Atomic::add(I(1), dest);
+}
+
+template<typename D>
+inline void Atomic::dec(D volatile* dest) {
+  STATIC_ASSERT(IsPointer<D>::value || IsIntegral<D>::value);
+  typedef typename Conditional<IsPointer<D>::value, ptrdiff_t, D>::type I;
+  // Assumes two's complement integer representation.
+  #pragma warning(suppress: 4146)
+  Atomic::add(I(-1), dest);
+}
+
 // Define the class before including platform file, which may specialize
 // the operator definition.  No generic definition of specializations
 // of the operator template are provided, nor are there any generic
@ -437,14 +453,6 @@ inline D Atomic::add_using_helper(Fn fn, I add_value, D volatile* dest) {
       reinterpret_cast<Type volatile*>(dest)));
 }

-inline void Atomic::inc(volatile size_t* dest) {
-  inc_ptr((volatile intptr_t*) dest);
-}
-
-inline void Atomic::dec(volatile size_t* dest) {
-  dec_ptr((volatile intptr_t*) dest);
-}
-
 template<typename T, typename D, typename U>
 inline D Atomic::cmpxchg(T exchange_value,
                         D volatile* dest,
@ -591,12 +599,4 @@ inline unsigned Atomic::xchg(unsigned int exchange_value, volatile unsigned int*
  return (unsigned int)Atomic::xchg((jint)exchange_value, (volatile jint*)dest);
 }

-inline void Atomic::inc(volatile jshort* dest) {
-  (void)add(jshort(1), dest);
-}
-
-inline void Atomic::dec(volatile jshort* dest) {
-  (void)add(jshort(-1), dest);
-}
-
 #endif // SHARE_VM_RUNTIME_ATOMIC_HPP
--- a/src/hotspot/share/runtime/globals.hpp
+++ b/src/hotspot/share/runtime/globals.hpp
@ -2344,12 +2344,6 @@ public:
          range(30*K, max_uintx/BytesPerWord)                               \
          constraint(InitialBootClassLoaderMetaspaceSizeConstraintFunc, AfterErgo)\
                                                                            \
-  product(bool, TraceYoungGenTime, false,                                   \
-          "Trace accumulated time for young collection")                    \
-                                                                            \
-  product(bool, TraceOldGenTime, false,                                     \
-          "Trace accumulated time for old collection")                      \
-                                                                            \
  product(bool, PrintHeapAtSIGBREAK, true,                                  \
          "Print heap layout in response to SIGBREAK")                      \
                                                                            \
--- a/src/hotspot/share/services/heapDumper.cpp
+++ b/src/hotspot/share/services/heapDumper.cpp
@ -856,6 +856,29 @@ void DumperSupport::dump_static_fields(DumpWriter* writer, Klass* k) {
    if (fldc.access_flags().is_static()) field_count++;
  }

+  // Add in resolved_references which is referenced by the cpCache
+  // The resolved_references is an array per InstanceKlass holding the
+  // strings and other oops resolved from the constant pool.
+  oop resolved_references = ik->constants()->resolved_references_or_null();
+  if (resolved_references != NULL) {
+    field_count++;
+
+    // Add in the resolved_references of the used previous versions of the class
+    // in the case of RedefineClasses
+    InstanceKlass* prev = ik->previous_versions();
+    while (prev != NULL && prev->constants()->resolved_references_or_null() != NULL) {
+      field_count++;
+      prev = prev->previous_versions();
+    }
+  }
+
+  // Also provide a pointer to the init_lock if present, so there aren't unreferenced int[0]
+  // arrays.
+  oop init_lock = ik->init_lock();
+  if (init_lock != NULL) {
+    field_count++;
+  }
+
  writer->write_u2(field_count);

  // pass 2 - dump the field descriptors and raw values
@ -873,6 +896,29 @@ void DumperSupport::dump_static_fields(DumpWriter* writer, Klass* k) {
      dump_field_value(writer, sig->byte_at(0), addr);
    }
  }
+
+  // Add resolved_references for each class that has them
+  if (resolved_references != NULL) {
+    writer->write_symbolID(vmSymbols::resolved_references_name());  // name
+    writer->write_u1(sig2tag(vmSymbols::object_array_signature())); // type
+    writer->write_objectID(resolved_references);
+
+    // Also write any previous versions
+    InstanceKlass* prev = ik->previous_versions();
+    while (prev != NULL && prev->constants()->resolved_references_or_null() != NULL) {
+      writer->write_symbolID(vmSymbols::resolved_references_name());  // name
+      writer->write_u1(sig2tag(vmSymbols::object_array_signature())); // type
+      writer->write_objectID(prev->constants()->resolved_references());
+      prev = prev->previous_versions();
+    }
+  }
+
+  // Add init lock to the end if the class is not yet initialized
+  if (init_lock != NULL) {
+    writer->write_symbolID(vmSymbols::init_lock_name());         // name
+    writer->write_u1(sig2tag(vmSymbols::int_array_signature())); // type
+    writer->write_objectID(init_lock);
+  }
 }

 // dump the raw values of the instance fields of the given object
@ -908,7 +954,7 @@ void DumperSupport::dump_instance_field_descriptors(DumpWriter* writer, Klass* k
    if (!fld.access_flags().is_static()) {
      Symbol* sig = fld.signature();

-      writer->write_symbolID(fld.name());                   // name
+      writer->write_symbolID(fld.name());   // name
      writer->write_u1(sig2tag(sig));       // type
    }
  }
@ -1822,6 +1868,8 @@ void VM_HeapDumper::doit() {
  // HPROF_GC_ROOT_JNI_GLOBAL
  JNIGlobalsDumper jni_dumper(writer());
  JNIHandles::oops_do(&jni_dumper);
+  Universe::oops_do(&jni_dumper);  // technically not jni roots, but global roots
+                                   // for things like preallocated throwable backtraces
  check_segment_length();

  // HPROF_GC_ROOT_STICKY_CLASS
--- a/src/hotspot/share/services/memBaseline.cpp
+++ b/src/hotspot/share/services/memBaseline.cpp
@ -144,6 +144,7 @@ class VirtualMemoryAllocationWalker : public VirtualMemoryWalker {
 bool MemBaseline::baseline_summary() {
  MallocMemorySummary::snapshot(&_malloc_memory_snapshot);
  VirtualMemorySummary::snapshot(&_virtual_memory_snapshot);
+  MetaspaceSnapshot::snapshot(_metaspace_snapshot);
  return true;
 }

--- a/src/hotspot/share/services/memBaseline.hpp
+++ b/src/hotspot/share/services/memBaseline.hpp
@ -65,6 +65,7 @@ class MemBaseline VALUE_OBJ_CLASS_SPEC {
  // Summary information
  MallocMemorySnapshot   _malloc_memory_snapshot;
  VirtualMemorySnapshot  _virtual_memory_snapshot;
+  MetaspaceSnapshot      _metaspace_snapshot;

  size_t               _class_count;

@ -103,6 +104,10 @@ class MemBaseline VALUE_OBJ_CLASS_SPEC {
    return &_virtual_memory_snapshot;
  }

+  MetaspaceSnapshot* metaspace_snapshot() {
+    return &_metaspace_snapshot;
+  }
+
  MallocSiteIterator malloc_sites(SortingOrder order);
  VirtualMemorySiteIterator virtual_memory_sites(SortingOrder order);

--- a/src/hotspot/share/services/memReporter.cpp
+++ b/src/hotspot/share/services/memReporter.cpp
@ -175,12 +175,44 @@ void MemSummaryReporter::report_summary_of_type(MEMFLAGS flag,
      amount_in_current_scale(_malloc_snapshot->malloc_overhead()->size()) > 0) {
      out->print_cr("%27s (tracking overhead=" SIZE_FORMAT "%s)", " ",
        amount_in_current_scale(_malloc_snapshot->malloc_overhead()->size()), scale);
+    } else if (flag == mtClass) {
+      // Metadata information
+      report_metadata(Metaspace::NonClassType);
+      if (Metaspace::using_class_space()) {
+        report_metadata(Metaspace::ClassType);
+      }
    }
-
    out->print_cr(" ");
  }
 }

+void MemSummaryReporter::report_metadata(Metaspace::MetadataType type) const {
+  assert(type == Metaspace::NonClassType || type == Metaspace::ClassType,
+    "Invalid metadata type");
+  const char* name = (type == Metaspace::NonClassType) ?
+    "Metadata:   " : "Class space:";
+
+  outputStream* out = output();
+  const char* scale = current_scale();
+  size_t committed   = MetaspaceAux::committed_bytes(type);
+  size_t used = MetaspaceAux::used_bytes(type);
+  size_t free = (MetaspaceAux::capacity_bytes(type) - used)
+              + MetaspaceAux::free_chunks_total_bytes(type)
+              + MetaspaceAux::free_bytes(type);
+
+  assert(committed >= used + free, "Sanity");
+  size_t waste = committed - (used + free);
+
+  out->print_cr("%27s (  %s)", " ", name);
+  out->print("%27s (    ", " ");
+  print_total(MetaspaceAux::reserved_bytes(type), committed);
+  out->print_cr(")");
+  out->print_cr("%27s (    used=" SIZE_FORMAT "%s)", " ", amount_in_current_scale(used), scale);
+  out->print_cr("%27s (    free=" SIZE_FORMAT "%s)", " ", amount_in_current_scale(free), scale);
+  out->print_cr("%27s (    waste=" SIZE_FORMAT "%s =%2.2f%%)", " ", amount_in_current_scale(waste),
+    scale, ((float)waste * 100)/committed);
+}
+
 void MemDetailReporter::report_detail() {
  // Start detail report
  outputStream* out = output();
@ -305,9 +337,13 @@ void MemSummaryDiffReporter::report_diff() {
    MEMFLAGS flag = NMTUtil::index_to_flag(index);
    // thread stack is reported as part of thread category
    if (flag == mtThreadStack) continue;
-    diff_summary_of_type(flag, _early_baseline.malloc_memory(flag),
-      _early_baseline.virtual_memory(flag), _current_baseline.malloc_memory(flag),
-      _current_baseline.virtual_memory(flag));
+    diff_summary_of_type(flag,
+      _early_baseline.malloc_memory(flag),
+      _early_baseline.virtual_memory(flag),
+      _early_baseline.metaspace_snapshot(),
+      _current_baseline.malloc_memory(flag),
+      _current_baseline.virtual_memory(flag),
+      _current_baseline.metaspace_snapshot());
  }
 }

@ -367,9 +403,11 @@ void MemSummaryDiffReporter::print_virtual_memory_diff(size_t current_reserved,
 }


-void MemSummaryDiffReporter::diff_summary_of_type(MEMFLAGS flag, const MallocMemory* early_malloc,
-  const VirtualMemory* early_vm, const MallocMemory* current_malloc,
-  const VirtualMemory* current_vm) const {
+void MemSummaryDiffReporter::diff_summary_of_type(MEMFLAGS flag,
+  const MallocMemory* early_malloc, const VirtualMemory* early_vm,
+  const MetaspaceSnapshot* early_ms,
+  const MallocMemory* current_malloc, const VirtualMemory* current_vm,
+  const MetaspaceSnapshot* current_ms) const {

  outputStream* out = output();
  const char* scale = current_scale();
@ -486,11 +524,77 @@ void MemSummaryDiffReporter::diff_summary_of_type(MEMFLAGS flag, const MallocMem
        out->print(" %+ld%s", overhead_diff, scale);
      }
      out->print_cr(")");
+    } else if (flag == mtClass) {
+      assert(current_ms != NULL && early_ms != NULL, "Sanity");
+      print_metaspace_diff(current_ms, early_ms);
    }
    out->print_cr(" ");
  }
 }

+void MemSummaryDiffReporter::print_metaspace_diff(const MetaspaceSnapshot* current_ms,
+                                                  const MetaspaceSnapshot* early_ms) const {
+  print_metaspace_diff(Metaspace::NonClassType, current_ms, early_ms);
+  if (Metaspace::using_class_space()) {
+    print_metaspace_diff(Metaspace::ClassType, current_ms, early_ms);
+  }
+}
+
+void MemSummaryDiffReporter::print_metaspace_diff(Metaspace::MetadataType type,
+                                                  const MetaspaceSnapshot* current_ms,
+                                                  const MetaspaceSnapshot* early_ms) const {
+  const char* name = (type == Metaspace::NonClassType) ?
+    "Metadata:   " : "Class space:";
+
+  outputStream* out = output();
+  const char* scale = current_scale();
+
+  out->print_cr("%27s (  %s)", " ", name);
+  out->print("%27s (    ", " ");
+  print_virtual_memory_diff(current_ms->reserved_in_bytes(type),
+                            current_ms->committed_in_bytes(type),
+                            early_ms->reserved_in_bytes(type),
+                            early_ms->committed_in_bytes(type));
+  out->print_cr(")");
+
+  long diff_used = diff_in_current_scale(current_ms->used_in_bytes(type),
+                                         early_ms->used_in_bytes(type));
+  long diff_free = diff_in_current_scale(current_ms->free_in_bytes(type),
+                                         early_ms->free_in_bytes(type));
+
+  size_t current_waste = current_ms->committed_in_bytes(type)
+    - (current_ms->used_in_bytes(type) + current_ms->free_in_bytes(type));
+  size_t early_waste = early_ms->committed_in_bytes(type)
+    - (early_ms->used_in_bytes(type) + early_ms->free_in_bytes(type));
+  long diff_waste = diff_in_current_scale(current_waste, early_waste);
+
+  // Diff used
+  out->print("%27s (    used=" SIZE_FORMAT "%s", " ",
+    amount_in_current_scale(current_ms->used_in_bytes(type)), scale);
+  if (diff_used != 0) {
+    out->print(" %+ld%s", diff_used, scale);
+  }
+  out->print_cr(")");
+
+  // Diff free
+  out->print("%27s (    free=" SIZE_FORMAT "%s", " ",
+    amount_in_current_scale(current_ms->free_in_bytes(type)), scale);
+  if (diff_free != 0) {
+    out->print(" %+ld%s", diff_free, scale);
+  }
+  out->print_cr(")");
+
+
+  // Diff waste
+  out->print("%27s (    waste=" SIZE_FORMAT "%s =%2.2f%%", " ",
+    amount_in_current_scale(current_waste), scale,
+    ((float)current_waste * 100) / current_ms->committed_in_bytes(type));
+  if (diff_waste != 0) {
+    out->print(" %+ld%s", diff_waste, scale);
+  }
+  out->print_cr(")");
+}
+
 void MemDetailDiffReporter::report_diff() {
  MemSummaryDiffReporter::report_diff();
  diff_malloc_sites();
--- a/src/hotspot/share/services/memReporter.hpp
+++ b/src/hotspot/share/services/memReporter.hpp
@ -27,6 +27,7 @@

 #if INCLUDE_NMT

+#include "memory/metaspace.hpp"
 #include "oops/instanceKlass.hpp"
 #include "services/memBaseline.hpp"
 #include "services/nmtCommon.hpp"
@ -110,6 +111,8 @@ class MemSummaryReporter : public MemReporterBase {
  // Report summary for each memory type
  void report_summary_of_type(MEMFLAGS type, MallocMemory* malloc_memory,
    VirtualMemory* virtual_memory);
+
+  void report_metadata(Metaspace::MetadataType type) const;
 };

 /*
@ -170,7 +173,9 @@ class MemSummaryDiffReporter : public MemReporterBase {
  // report the comparison of each memory type
  void diff_summary_of_type(MEMFLAGS type,
    const MallocMemory* early_malloc, const VirtualMemory* early_vm,
-    const MallocMemory* current_malloc, const VirtualMemory* current_vm) const;
+    const MetaspaceSnapshot* early_ms,
+    const MallocMemory* current_malloc, const VirtualMemory* current_vm,
+    const MetaspaceSnapshot* current_ms) const;

 protected:
  void print_malloc_diff(size_t current_amount, size_t current_count,
@ -179,6 +184,11 @@ class MemSummaryDiffReporter : public MemReporterBase {
    size_t early_reserved, size_t early_committed) const;
  void print_arena_diff(size_t current_amount, size_t current_count,
    size_t early_amount, size_t early_count) const;
+
+  void print_metaspace_diff(const MetaspaceSnapshot* current_ms,
+                            const MetaspaceSnapshot* early_ms) const;
+  void print_metaspace_diff(Metaspace::MetadataType type,
+    const MetaspaceSnapshot* current_ms, const MetaspaceSnapshot* early_ms) const;
 };

 /*
--- a/src/hotspot/share/services/virtualMemoryTracker.cpp
+++ b/src/hotspot/share/services/virtualMemoryTracker.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -23,6 +23,7 @@
 */
 #include "precompiled.hpp"

+#include "memory/metaspace.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/os.hpp"
 #include "runtime/threadCritical.hpp"
@ -492,3 +493,35 @@ bool VirtualMemoryTracker::transition(NMT_TrackingLevel from, NMT_TrackingLevel

  return true;
 }
+
+// Metaspace Support
+MetaspaceSnapshot::MetaspaceSnapshot() {
+  for (int index = (int)Metaspace::ClassType; index < (int)Metaspace::MetadataTypeCount; index ++) {
+    Metaspace::MetadataType type = (Metaspace::MetadataType)index;
+    assert_valid_metadata_type(type);
+    _reserved_in_bytes[type]  = 0;
+    _committed_in_bytes[type] = 0;
+    _used_in_bytes[type]      = 0;
+    _free_in_bytes[type]      = 0;
+  }
+}
+
+void MetaspaceSnapshot::snapshot(Metaspace::MetadataType type, MetaspaceSnapshot& mss) {
+  assert_valid_metadata_type(type);
+
+  mss._reserved_in_bytes[type]   = MetaspaceAux::reserved_bytes(type);
+  mss._committed_in_bytes[type]  = MetaspaceAux::committed_bytes(type);
+  mss._used_in_bytes[type]       = MetaspaceAux::used_bytes(type);
+
+  size_t free_in_bytes = (MetaspaceAux::capacity_bytes(type) - MetaspaceAux::used_bytes(type))
+                       + MetaspaceAux::free_chunks_total_bytes(type)
+                       + MetaspaceAux::free_bytes(type);
+  mss._free_in_bytes[type] = free_in_bytes;
+}
+
+void MetaspaceSnapshot::snapshot(MetaspaceSnapshot& mss) {
+  snapshot(Metaspace::ClassType, mss);
+  if (Metaspace::using_class_space()) {
+    snapshot(Metaspace::NonClassType, mss);
+  }
+}
--- a/src/hotspot/share/services/virtualMemoryTracker.hpp
+++ b/src/hotspot/share/services/virtualMemoryTracker.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -28,6 +28,7 @@
 #if INCLUDE_NMT

 #include "memory/allocation.hpp"
+#include "memory/metaspace.hpp"
 #include "services/allocationSite.hpp"
 #include "services/nmtCommon.hpp"
 #include "utilities/linkedlist.hpp"
@ -419,6 +420,31 @@ class VirtualMemoryTracker : AllStatic {
 };


+class MetaspaceSnapshot : public ResourceObj {
+private:
+  size_t  _reserved_in_bytes[Metaspace::MetadataTypeCount];
+  size_t  _committed_in_bytes[Metaspace::MetadataTypeCount];
+  size_t  _used_in_bytes[Metaspace::MetadataTypeCount];
+  size_t  _free_in_bytes[Metaspace::MetadataTypeCount];
+
+public:
+  MetaspaceSnapshot();
+  size_t reserved_in_bytes(Metaspace::MetadataType type)   const { assert_valid_metadata_type(type); return _reserved_in_bytes[type]; }
+  size_t committed_in_bytes(Metaspace::MetadataType type)  const { assert_valid_metadata_type(type); return _committed_in_bytes[type]; }
+  size_t used_in_bytes(Metaspace::MetadataType type)       const { assert_valid_metadata_type(type); return _used_in_bytes[type]; }
+  size_t free_in_bytes(Metaspace::MetadataType type)       const { assert_valid_metadata_type(type); return _free_in_bytes[type]; }
+
+  static void snapshot(MetaspaceSnapshot& s);
+
+private:
+  static void snapshot(Metaspace::MetadataType type, MetaspaceSnapshot& s);
+
+  static void assert_valid_metadata_type(Metaspace::MetadataType type) {
+    assert(type == Metaspace::ClassType || type == Metaspace::NonClassType,
+      "Invalid metadata type");
+  }
+};
+
 #endif // INCLUDE_NMT

 #endif // SHARE_VM_SERVICES_VIRTUAL_MEMORY_TRACKER_HPP
--- a/src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat
+++ b/src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat
@ -1,4 +0,0 @@
-BOOT
-@@BOOT_MODULE_NAMES@@
-PLATFORM
-@@PLATFORM_MODULE_NAMES@@
--- a/src/java.management/share/classes/module-info.java
+++ b/src/java.management/share/classes/module-info.java
@ -64,7 +64,8 @@ module java.management {
    exports sun.management.counter.perf to
        jdk.management.agent;
    exports sun.management.spi to
-        jdk.management;
+        jdk.management,
+        jdk.internal.vm.compiler.management;

    uses javax.management.remote.JMXConnectorProvider;
    uses javax.management.remote.JMXConnectorServerProvider;
--- a/src/jdk.attach/linux/classes/sun/tools/attach/VirtualMachineImpl.java
+++ b/src/jdk.attach/linux/classes/sun/tools/attach/VirtualMachineImpl.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -32,6 +32,10 @@ import com.sun.tools.attach.spi.AttachProvider;
 import java.io.InputStream;
 import java.io.IOException;
 import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.Files;

 /*
 * Linux implementation of HotSpotVirtualMachine
@ -63,12 +67,15 @@ public class VirtualMachineImpl extends HotSpotVirtualMachine {
            throw new AttachNotSupportedException("Invalid process identifier");
        }

+        // Try to resolve to the "inner most" pid namespace
+        int ns_pid = getNamespacePid(pid);
+
        // Find the socket file. If not found then we attempt to start the
        // attach mechanism in the target VM by sending it a QUIT signal.
        // Then we attempt to find the socket file again.
-        path = findSocketFile(pid);
+        path = findSocketFile(pid, ns_pid);
        if (path == null) {
-            File f = createAttachFile(pid);
+            File f = createAttachFile(pid, ns_pid);
            try {
                sendQuitTo(pid);

@ -83,7 +90,7 @@ public class VirtualMachineImpl extends HotSpotVirtualMachine {
                    try {
                        Thread.sleep(delay);
                    } catch (InterruptedException x) { }
-                    path = findSocketFile(pid);
+                    path = findSocketFile(pid, ns_pid);

                    time_spend += delay;
                    if (time_spend > timeout/2 && path == null) {
@ -262,8 +269,12 @@ public class VirtualMachineImpl extends HotSpotVirtualMachine {
    }

    // Return the socket file for the given process.
-    private String findSocketFile(int pid) {
-        File f = new File(tmpdir, ".java_pid" + pid);
+    private String findSocketFile(int pid, int ns_pid) {
+        // A process may not exist in the same mount namespace as the caller.
+        // Instead, attach relative to the target root filesystem as exposed by
+        // procfs regardless of namespaces.
+        String root = "/proc/" + pid + "/root/" + tmpdir;
+        File f = new File(root, ".java_pid" + ns_pid);
        if (!f.exists()) {
            return null;
        }
@ -274,14 +285,23 @@ public class VirtualMachineImpl extends HotSpotVirtualMachine {
    // if not already started. The client creates a .attach_pid<pid> file in the
    // target VM's working directory (or temp directory), and the SIGQUIT handler
    // checks for the file.
-    private File createAttachFile(int pid) throws IOException {
-        String fn = ".attach_pid" + pid;
+    private File createAttachFile(int pid, int ns_pid) throws IOException {
+        String fn = ".attach_pid" + ns_pid;
        String path = "/proc/" + pid + "/cwd/" + fn;
        File f = new File(path);
        try {
            f.createNewFile();
        } catch (IOException x) {
-            f = new File(tmpdir, fn);
+            String root;
+            if (pid != ns_pid) {
+                // A process may not exist in the same mount namespace as the caller.
+                // Instead, attach relative to the target root filesystem as exposed by
+                // procfs regardless of namespaces.
+                root = "/proc/" + pid + "/root/" + tmpdir;
+            } else {
+                root = tmpdir;
+            }
+            f = new File(root, fn);
            f.createNewFile();
        }
        return f;
@ -307,6 +327,40 @@ public class VirtualMachineImpl extends HotSpotVirtualMachine {
    }


+    // Return the inner most namespaced PID if there is one,
+    // otherwise return the original PID.
+    private int getNamespacePid(int pid) throws AttachNotSupportedException, IOException {
+        // Assuming a real procfs sits beneath, reading this doesn't block
+        // nor will it consume a lot of memory.
+        String statusFile = "/proc/" + pid + "/status";
+        File f = new File(statusFile);
+        if (!f.exists()) {
+            return pid; // Likely a bad pid, but this is properly handled later.
+        }
+
+        Path statusPath = Paths.get(statusFile);
+
+        try {
+            for (String line : Files.readAllLines(statusPath, StandardCharsets.UTF_8)) {
+                String[] parts = line.split(":");
+                if (parts.length == 2 && parts[0].trim().equals("NSpid")) {
+                    parts = parts[1].trim().split("\\s+");
+                    // The last entry represents the PID the JVM "thinks" it is.
+                    // Even in non-namespaced pids these entries should be
+                    // valid. You could refer to it as the inner most pid.
+                    int ns_pid = Integer.parseInt(parts[parts.length - 1]);
+                    return ns_pid;
+                }
+            }
+            // Old kernels may not have NSpid field (i.e. 3.10).
+            // Fallback to original pid in the event we cannot deduce.
+            return pid;
+        } catch (NumberFormatException | IOException x) {
+            throw new AttachNotSupportedException("Unable to parse namespace");
+        }
+    }
+
+
    //-- native methods

    static native void sendQuitToChildrenOf(int pid) throws IOException;
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/code/NMethod.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/code/NMethod.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -71,7 +71,7 @@ public class NMethod extends CompiledMethod {
      stack.  An not_entrant method can be removed when there is no
      more activations, i.e., when the _stack_traversal_mark is less than
      current sweep traversal index. */
-  private static JLongField stackTraversalMarkField;
+  private static CIntegerField stackTraversalMarkField;

  private static CIntegerField compLevelField;

@ -105,7 +105,7 @@ public class NMethod extends CompiledMethod {
    verifiedEntryPointField     = type.getAddressField("_verified_entry_point");
    osrEntryPointField          = type.getAddressField("_osr_entry_point");
    lockCountField              = type.getJIntField("_lock_count");
-    stackTraversalMarkField     = type.getJLongField("_stack_traversal_mark");
+    stackTraversalMarkField     = type.getCIntegerField("_stack_traversal_mark");
    compLevelField              = type.getCIntegerField("_comp_level");
    pcDescSize = db.lookupType("PcDesc").getSize();
  }
--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotJVMCIBackendFactory.java
+++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotJVMCIBackendFactory.java
@ -79,9 +79,15 @@ public class SPARCHotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFacto
        if ((config.vmVersionFeatures & 1L << config.sparc_DES) != 0) {
            features.add(CPUFeature.DES);
        }
+        if ((config.vmVersionFeatures & 1L << config.sparc_DICTUNP) != 0) {
+            features.add(CPUFeature.DICTUNP);
+        }
        if ((config.vmVersionFeatures & 1L << config.sparc_FMAF) != 0) {
            features.add(CPUFeature.FMAF);
        }
+        if ((config.vmVersionFeatures & 1L << config.sparc_FPCMPSHL) != 0) {
+            features.add(CPUFeature.FPCMPSHL);
+        }
        if ((config.vmVersionFeatures & 1L << config.sparc_HPC) != 0) {
            features.add(CPUFeature.HPC);
        }
@ -94,6 +100,9 @@ public class SPARCHotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFacto
        if ((config.vmVersionFeatures & 1L << config.sparc_MD5) != 0) {
            features.add(CPUFeature.MD5);
        }
+        if ((config.vmVersionFeatures & 1L << config.sparc_MME) != 0) {
+            features.add(CPUFeature.MME);
+        }
        if ((config.vmVersionFeatures & 1L << config.sparc_MONT) != 0) {
            features.add(CPUFeature.MONT);
        }
@ -112,18 +121,30 @@ public class SPARCHotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFacto
        if ((config.vmVersionFeatures & 1L << config.sparc_POPC) != 0) {
            features.add(CPUFeature.POPC);
        }
+        if ((config.vmVersionFeatures & 1L << config.sparc_RLE) != 0) {
+            features.add(CPUFeature.RLE);
+        }
        if ((config.vmVersionFeatures & 1L << config.sparc_SHA1) != 0) {
            features.add(CPUFeature.SHA1);
        }
        if ((config.vmVersionFeatures & 1L << config.sparc_SHA256) != 0) {
            features.add(CPUFeature.SHA256);
        }
+        if ((config.vmVersionFeatures & 1L << config.sparc_SHA3) != 0) {
+            features.add(CPUFeature.SHA3);
+        }
        if ((config.vmVersionFeatures & 1L << config.sparc_SHA512) != 0) {
            features.add(CPUFeature.SHA512);
        }
        if ((config.vmVersionFeatures & 1L << config.sparc_SPARC5) != 0) {
            features.add(CPUFeature.SPARC5);
        }
+        if ((config.vmVersionFeatures & 1L << config.sparc_SPARC5B) != 0) {
+            features.add(CPUFeature.SPARC5B);
+        }
+        if ((config.vmVersionFeatures & 1L << config.sparc_SPARC6) != 0) {
+            features.add(CPUFeature.SPARC6);
+        }
        if ((config.vmVersionFeatures & 1L << config.sparc_V9) != 0) {
            features.add(CPUFeature.V9);
        }
@ -142,6 +163,9 @@ public class SPARCHotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFacto
        if ((config.vmVersionFeatures & 1L << config.sparc_VIS3B) != 0) {
            features.add(CPUFeature.VIS3B);
        }
+        if ((config.vmVersionFeatures & 1L << config.sparc_VIS3C) != 0) {
+            features.add(CPUFeature.VIS3C);
+        }
        if ((config.vmVersionFeatures & 1L << config.sparc_XMONT) != 0) {
            features.add(CPUFeature.XMONT);
        }
--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotVMConfig.java
+++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotVMConfig.java
@ -55,27 +55,35 @@ class SPARCHotSpotVMConfig extends HotSpotVMConfigAccess {
    final int sparc_CBCOND   = getConstant("VM_Version::ISA_CBCOND",   Integer.class);
    final int sparc_CRC32C   = getConstant("VM_Version::ISA_CRC32C",   Integer.class);
    final int sparc_DES      = getConstant("VM_Version::ISA_DES",      Integer.class);
+    final int sparc_DICTUNP  = getConstant("VM_Version::ISA_DICTUNP",  Integer.class);
    final int sparc_FMAF     = getConstant("VM_Version::ISA_FMAF",     Integer.class);
+    final int sparc_FPCMPSHL = getConstant("VM_Version::ISA_FPCMPSHL", Integer.class);
    final int sparc_HPC      = getConstant("VM_Version::ISA_HPC",      Integer.class);
    final int sparc_IMA      = getConstant("VM_Version::ISA_IMA",      Integer.class);
    final int sparc_KASUMI   = getConstant("VM_Version::ISA_KASUMI",   Integer.class);
    final int sparc_MD5      = getConstant("VM_Version::ISA_MD5",      Integer.class);
+    final int sparc_MME      = getConstant("VM_Version::ISA_MME",      Integer.class);
    final int sparc_MONT     = getConstant("VM_Version::ISA_MONT",     Integer.class);
    final int sparc_MPMUL    = getConstant("VM_Version::ISA_MPMUL",    Integer.class);
    final int sparc_MWAIT    = getConstant("VM_Version::ISA_MWAIT",    Integer.class);
    final int sparc_PAUSE    = getConstant("VM_Version::ISA_PAUSE",    Integer.class);
    final int sparc_PAUSE_NSEC = getConstant("VM_Version::ISA_PAUSE_NSEC", Integer.class);
    final int sparc_POPC     = getConstant("VM_Version::ISA_POPC",     Integer.class);
+    final int sparc_RLE      = getConstant("VM_Version::ISA_RLE",      Integer.class);
    final int sparc_SHA1     = getConstant("VM_Version::ISA_SHA1",     Integer.class);
    final int sparc_SHA256   = getConstant("VM_Version::ISA_SHA256",   Integer.class);
+    final int sparc_SHA3     = getConstant("VM_Version::ISA_SHA3",     Integer.class);
    final int sparc_SHA512   = getConstant("VM_Version::ISA_SHA512",   Integer.class);
    final int sparc_SPARC5   = getConstant("VM_Version::ISA_SPARC5",   Integer.class);
+    final int sparc_SPARC5B  = getConstant("VM_Version::ISA_SPARC5B",  Integer.class);
+    final int sparc_SPARC6   = getConstant("VM_Version::ISA_SPARC6",   Integer.class);
    final int sparc_V9       = getConstant("VM_Version::ISA_V9",       Integer.class);
    final int sparc_VAMASK   = getConstant("VM_Version::ISA_VAMASK",   Integer.class);
    final int sparc_VIS1     = getConstant("VM_Version::ISA_VIS1",     Integer.class);
    final int sparc_VIS2     = getConstant("VM_Version::ISA_VIS2",     Integer.class);
    final int sparc_VIS3     = getConstant("VM_Version::ISA_VIS3",     Integer.class);
    final int sparc_VIS3B    = getConstant("VM_Version::ISA_VIS3B",    Integer.class);
+    final int sparc_VIS3C    = getConstant("VM_Version::ISA_VIS3C",    Integer.class);
    final int sparc_XMONT    = getConstant("VM_Version::ISA_XMONT",    Integer.class);
    final int sparc_XMPMUL   = getConstant("VM_Version::ISA_XMPMUL",   Integer.class);

--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.sparc/src/jdk/vm/ci/sparc/SPARC.java
+++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.sparc/src/jdk/vm/ci/sparc/SPARC.java
@ -344,27 +344,35 @@ public class SPARC extends Architecture {
        CBCOND,
        CRC32C,
        DES,
+        DICTUNP,
        FMAF,
+        FPCMPSHL,
        HPC,
        IMA,
        KASUMI,
        MD5,
+        MME,
        MONT,
        MPMUL,
        MWAIT,
        PAUSE,
        PAUSE_NSEC,
        POPC,
+        RLE,
        SHA1,
        SHA256,
+        SHA3,
        SHA512,
        SPARC5,
+        SPARC5B,
+        SPARC6,
        V9,
        VAMASK,
        VIS1,
        VIS2,
        VIS3,
        VIS3B,
+        VIS3C,
        XMONT,
        XMPMUL,
        // Synthesised CPU properties:
--- a/src/jdk.internal.vm.ci/share/classes/module-info.java
+++ b/src/jdk.internal.vm.ci/share/classes/module-info.java
@ -25,6 +25,9 @@

 module jdk.internal.vm.ci {
    exports jdk.vm.ci.services to jdk.internal.vm.compiler;
+    exports jdk.vm.ci.runtime to
+        jdk.internal.vm.compiler,
+        jdk.internal.vm.compiler.management;

    uses jdk.vm.ci.services.JVMCIServiceLocator;
    uses jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
--- a/src/jdk.internal.vm.compiler.management/share/classes/module-info.java
+++ b/src/jdk.internal.vm.compiler.management/share/classes/module-info.java
@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * Registers Graal Compiler specific management interfaces for the JVM.
+ *
+ * @moduleGraph
+ * @since 10
+ */
+module jdk.internal.vm.compiler.management {
+    requires java.management;
+    requires jdk.management;
+    requires jdk.internal.vm.ci;
+    requires jdk.internal.vm.compiler;
+
+    provides sun.management.spi.PlatformMBeanProvider with
+        org.graalvm.compiler.hotspot.jmx.GraalMBeans;
+}
+
--- a/src/jdk.internal.vm.compiler.management/share/classes/org/graalvm/compiler/hotspot/jmx/GraalMBeans.java
+++ b/src/jdk.internal.vm.compiler.management/share/classes/org/graalvm/compiler/hotspot/jmx/GraalMBeans.java
@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.hotspot.jmx;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import jdk.vm.ci.runtime.JVMCI;
+import jdk.vm.ci.runtime.JVMCICompiler;
+import jdk.vm.ci.runtime.JVMCIRuntime;
+import org.graalvm.compiler.hotspot.HotSpotGraalCompiler;
+import sun.management.spi.PlatformMBeanProvider;
+import sun.management.spi.PlatformMBeanProvider.PlatformComponent;
+
+public final class GraalMBeans extends PlatformMBeanProvider {
+    @Override
+    public List<PlatformComponent<?>> getPlatformComponentList() {
+        List<PlatformComponent<?>> components = new ArrayList<>();
+        try {
+            Object bean = findGraalRuntimeBean();
+            if (bean != null) {
+                components.add(new HotSpotRuntimeMBeanComponent(bean));
+            }
+        } catch (InternalError | LinkageError err) {
+            // go on and ignore
+        }
+        return components;
+    }
+
+    public static Object findGraalRuntimeBean() {
+        JVMCIRuntime r = JVMCI.getRuntime();
+        JVMCICompiler c = r.getCompiler();
+        if (c instanceof HotSpotGraalCompiler) {
+            return ((HotSpotGraalCompiler) c).mbean();
+        }
+        return null;
+    }
+
+    private static final class HotSpotRuntimeMBeanComponent implements PlatformComponent<Object> {
+
+        private final String name;
+        private final Object mbean;
+
+        HotSpotRuntimeMBeanComponent(Object mbean) {
+            this.name = "org.graalvm.compiler.hotspot:type=Options";
+            this.mbean = mbean;
+        }
+
+        @Override
+        public Set<Class<?>> mbeanInterfaces() {
+            return Collections.emptySet();
+        }
+
+        @Override
+        public Set<String> mbeanInterfaceNames() {
+            return Collections.emptySet();
+        }
+
+        @Override
+        public String getObjectNamePattern() {
+            return name;
+        }
+
+        @Override
+        public Map<String, Object> nameToMBeanMap() {
+            return Collections.<String, Object>singletonMap(name, mbean);
+        }
+    }
+}
--- a/src/jdk.internal.vm.compiler/share/classes/module-info.java
+++ b/src/jdk.internal.vm.compiler/share/classes/module-info.java
@ -50,7 +50,9 @@ module jdk.internal.vm.compiler {
    exports org.graalvm.compiler.core.target            to jdk.aot;
    exports org.graalvm.compiler.debug                  to jdk.aot;
    exports org.graalvm.compiler.graph                  to jdk.aot;
-    exports org.graalvm.compiler.hotspot                to jdk.aot;
+    exports org.graalvm.compiler.hotspot                to
+        jdk.aot,
+        jdk.internal.vm.compiler.management;
    exports org.graalvm.compiler.hotspot.meta           to jdk.aot;
    exports org.graalvm.compiler.hotspot.replacements   to jdk.aot;
    exports org.graalvm.compiler.hotspot.stubs          to jdk.aot;
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/HotSpotGraalCompiler.java
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/HotSpotGraalCompiler.java
@ -282,6 +282,13 @@ public class HotSpotGraalCompiler implements GraalJVMCICompiler {
        return suite;
    }

+    public Object mbean() {
+        if (graalRuntime instanceof HotSpotGraalRuntime) {
+            return ((HotSpotGraalRuntime)graalRuntime).mbean();
+        }
+        return null;
+    }
+
    /**
     * Converts {@code method} to a String with {@link JavaMethod#format(String)} and the format
     * string {@code "%H.%n(%p)"}.
--- a/Show More
+++ b/Show More