Merge

2026-02-06 16:38:36 +00:00 · 2015-10-19 12:30:17 -07:00 · 2015-10-19 12:30:17 -07:00 · cfbb592424
commit cfbb592424
parent aa8e6904dc 344a9becee
941 changed files with 66316 additions and 6373 deletions
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/ImmutableOopMapSet.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/ImmutableOopMapSet.java
@ -67,9 +67,6 @@ public class ImmutableOopMapSet extends VMObject {
      }
    }

-    public void visitValueLocation(Address valueAddr) {
-    }
-
    public void visitNarrowOopLocation(Address narrowOopAddr) {
      addressVisitor.visitCompOopAddress(narrowOopAddr);
    }
@ -216,9 +213,9 @@ public class ImmutableOopMapSet extends VMObject {
      }
    }

-    // We want narow oop, value and oop oop_types
-    OopMapValue.OopTypes[] values = new OopMapValue.OopTypes[]{
-        OopMapValue.OopTypes.OOP_VALUE, OopMapValue.OopTypes.VALUE_VALUE, OopMapValue.OopTypes.NARROWOOP_VALUE
+    // We want narow oop and oop oop_types
+    OopMapValue.OopTypes[] values = new OopMapValue.OopTypes[] {
+        OopMapValue.OopTypes.OOP_VALUE, OopMapValue.OopTypes.NARROWOOP_VALUE
    };

    {
@ -231,8 +228,6 @@ public class ImmutableOopMapSet extends VMObject {
            // to detect in the debugging system
            // assert(Universe::is_heap_or_null(*loc), "found non oop pointer");
            visitor.visitOopLocation(loc);
-          } else if (omv.getType() == OopMapValue.OopTypes.VALUE_VALUE) {
-            visitor.visitValueLocation(loc);
          } else if (omv.getType() == OopMapValue.OopTypes.NARROWOOP_VALUE) {
            visitor.visitNarrowOopLocation(loc);
          }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapValue.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapValue.java
@ -49,7 +49,6 @@ public class OopMapValue {
  // Types of OopValues
  static int UNUSED_VALUE;
  static int OOP_VALUE;
-  static int VALUE_VALUE;
  static int NARROWOOP_VALUE;
  static int CALLEE_SAVED_VALUE;
  static int DERIVED_OOP_VALUE;
@ -73,7 +72,6 @@ public class OopMapValue {
    REGISTER_MASK_IN_PLACE = db.lookupIntConstant("OopMapValue::register_mask_in_place").intValue();
    UNUSED_VALUE           = db.lookupIntConstant("OopMapValue::unused_value").intValue();
    OOP_VALUE              = db.lookupIntConstant("OopMapValue::oop_value").intValue();
-    VALUE_VALUE            = db.lookupIntConstant("OopMapValue::value_value").intValue();
    NARROWOOP_VALUE        = db.lookupIntConstant("OopMapValue::narrowoop_value").intValue();
    CALLEE_SAVED_VALUE     = db.lookupIntConstant("OopMapValue::callee_saved_value").intValue();
    DERIVED_OOP_VALUE      = db.lookupIntConstant("OopMapValue::derived_oop_value").intValue();
@ -82,7 +80,6 @@ public class OopMapValue {
  public static abstract class OopTypes {
    public static final OopTypes UNUSED_VALUE       = new OopTypes() { int getValue() { return OopMapValue.UNUSED_VALUE;       }};
    public static final OopTypes OOP_VALUE          = new OopTypes() { int getValue() { return OopMapValue.OOP_VALUE;          }};
-    public static final OopTypes VALUE_VALUE        = new OopTypes() { int getValue() { return OopMapValue.VALUE_VALUE;        }};
    public static final OopTypes NARROWOOP_VALUE    = new OopTypes() { int getValue() { return OopMapValue.NARROWOOP_VALUE;         }};
    public static final OopTypes CALLEE_SAVED_VALUE = new OopTypes() { int getValue() { return OopMapValue.CALLEE_SAVED_VALUE; }};
    public static final OopTypes DERIVED_OOP_VALUE  = new OopTypes() { int getValue() { return OopMapValue.DERIVED_OOP_VALUE;  }};
@ -105,7 +102,6 @@ public class OopMapValue {

  // Querying
  public boolean isOop()         { return (getValue() & TYPE_MASK_IN_PLACE) == OOP_VALUE;          }
-  public boolean isValue()       { return (getValue() & TYPE_MASK_IN_PLACE) == VALUE_VALUE;        }
  public boolean isNarrowOop()   { return (getValue() & TYPE_MASK_IN_PLACE) == NARROWOOP_VALUE;    }
  public boolean isCalleeSaved() { return (getValue() & TYPE_MASK_IN_PLACE) == CALLEE_SAVED_VALUE; }
  public boolean isDerivedOop()  { return (getValue() & TYPE_MASK_IN_PLACE) == DERIVED_OOP_VALUE;  }
@ -117,7 +113,6 @@ public class OopMapValue {
    int which = (getValue() & TYPE_MASK_IN_PLACE);
         if (which == UNUSED_VALUE) return OopTypes.UNUSED_VALUE;
    else if (which == OOP_VALUE)    return OopTypes.OOP_VALUE;
-    else if (which == VALUE_VALUE)  return OopTypes.VALUE_VALUE;
    else if (which == NARROWOOP_VALUE)   return OopTypes.NARROWOOP_VALUE;
    else if (which == CALLEE_SAVED_VALUE) return OopTypes.CALLEE_SAVED_VALUE;
    else if (which == DERIVED_OOP_VALUE)  return OopTypes.DERIVED_OOP_VALUE;
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapVisitor.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapVisitor.java
@ -31,6 +31,5 @@ import sun.jvm.hotspot.debugger.*;
 public interface OopMapVisitor {
  public void visitOopLocation(Address oopAddr);
  public void visitDerivedOopLocation(Address baseOopAddr, Address derivedOopAddr);
-  public void visitValueLocation(Address valueAddr);
  public void visitNarrowOopLocation(Address narrowOopAddr);
 }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java
@ -536,9 +536,6 @@ public abstract class Frame implements Cloneable {
      }
    }

-    public void visitValueLocation(Address valueAddr) {
-    }
-
    public void visitNarrowOopLocation(Address compOopAddr) {
      addressVisitor.visitCompOopAddress(compOopAddr);
    }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java
@ -1220,9 +1220,6 @@ public class HTMLGenerator implements /* imports */ ClassConstants {
      oms = new OopMapStream(map, OopMapValue.OopTypes.NARROWOOP_VALUE);
      buf.append(omvIterator.iterate(oms, "NarrowOops:", false));

-      oms = new OopMapStream(map, OopMapValue.OopTypes.VALUE_VALUE);
-      buf.append(omvIterator.iterate(oms, "Values:", false));
-
      oms = new OopMapStream(map, OopMapValue.OopTypes.CALLEE_SAVED_VALUE);
      buf.append(omvIterator.iterate(oms, "Callee saved:",  true));

--- a/hotspot/make/bsd/makefiles/compiler1.make
+++ b/hotspot/make/bsd/makefiles/compiler1.make
@ -28,4 +28,7 @@ TYPE=COMPILER1

 VM_SUBDIR = client

+# We don't support the JVMCI in a client VM.
+INCLUDE_JVMCI := false
+
 CFLAGS += -DCOMPILER1
--- a/hotspot/make/bsd/makefiles/gcc.make
+++ b/hotspot/make/bsd/makefiles/gcc.make
@ -149,6 +149,7 @@ ifeq ($(USE_CLANG), true)
    PCH_FLAG/sharedRuntimeTrig.o = $(PCH_FLAG/NO_PCH)
    PCH_FLAG/sharedRuntimeTrans.o = $(PCH_FLAG/NO_PCH)
    PCH_FLAG/unsafe.o = $(PCH_FLAG/NO_PCH)
+    PCH_FLAG/jvmciCompilerToVM.o = $(PCH_FLAG/NO_PCH)

  endif
 else # ($(USE_CLANG), true)
@ -313,10 +314,11 @@ endif

 # Work around some compiler bugs.
 ifeq ($(USE_CLANG), true)
-  # Clang <= 6.1
+  # Clang < 6 | <= 6.1 | <= 7.0
  ifeq ($(shell expr \
      $(CC_VER_MAJOR) \< 6 \| \
-      \( $(CC_VER_MAJOR) = 6 \& $(CC_VER_MINOR) \<= 1 \) \
+      \( $(CC_VER_MAJOR) = 6 \& $(CC_VER_MINOR) \<= 1 \) \| \
+      \( $(CC_VER_MAJOR) = 7 \& $(CC_VER_MINOR) \<= 0 \) \
    ), 1)
    OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT)
    OPT_CFLAGS/unsafe.o += -O1
--- a/hotspot/make/bsd/makefiles/jsig.make
+++ b/hotspot/make/bsd/makefiles/jsig.make
@ -62,7 +62,7 @@ endif
 $(LIBJSIG): $(JSIGSRCDIR)/jsig.c $(LIBJSIG_MAPFILE)
 	@echo $(LOG_INFO) Making signal interposition lib...
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
-                         $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) -o $@ $<
+                         $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) $(EXTRA_CFLAGS) -o $@ $<
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
  ifeq ($(OS_VENDOR), Darwin)
 	$(DSYMUTIL) $@
--- a/hotspot/make/bsd/makefiles/minimal1.make
+++ b/hotspot/make/bsd/makefiles/minimal1.make
@ -38,6 +38,7 @@ INCLUDE_ALL_GCS := false
 INCLUDE_NMT := false
 INCLUDE_TRACE := false
 INCLUDE_CDS := false
+INCLUDE_JVMCI := false

 CXXFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\"
 CFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\"
--- a/hotspot/make/excludeSrc.make
+++ b/hotspot/make/excludeSrc.make
@ -106,6 +106,25 @@ ifeq ($(INCLUDE_NMT), false)
 	 memTracker.cpp nmtDCmd.cpp mallocSiteTable.cpp
 endif

+ifneq (,$(findstring $(Platform_arch_model), x86_64, sparc))
+      # JVMCI is supported only on x86_64 and SPARC.
+else
+      INCLUDE_JVMCI := false
+endif
+
+ifeq ($(INCLUDE_JVMCI), false)
+      CXXFLAGS += -DINCLUDE_JVMCI=0
+      CFLAGS += -DINCLUDE_JVMCI=0
+
+      jvmci_dir := $(HS_COMMON_SRC)/share/vm/jvmci
+      jvmci_dir_alt := $(HS_ALT_SRC)/share/vm/jvmci
+      jvmci_exclude := $(notdir $(wildcard $(jvmci_dir)/*.cpp))	\
+			$(notdir $(wildcard $(jvmci_dir_alt)/*.cpp))
+      Src_Files_EXCLUDE += $(jvmci_exclude) \
+	jvmciCodeInstaller_aarch64.cpp jvmciCodeInstaller_ppc.cpp jvmciCodeInstaller_sparc.cpp \
+	jvmciCodeInstaller_x86.cpp
+endif
+
 -include $(HS_ALT_MAKE)/excludeSrc.make

 .PHONY: $(HS_ALT_MAKE)/excludeSrc.make
--- a/hotspot/make/gensrc/Gensrc-jdk.vm.ci.gmk
+++ b/hotspot/make/gensrc/Gensrc-jdk.vm.ci.gmk
@ -0,0 +1,121 @@
+#
+# Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.  Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+default: all
+
+include $(SPEC)
+include MakeBase.gmk
+include JavaCompilation.gmk
+include SetupJavaCompilers.gmk
+
+GENSRC_DIR := $(SUPPORT_OUTPUTDIR)/gensrc/jdk.vm.ci
+SRC_DIR := $(HOTSPOT_TOPDIR)/src/jdk.vm.ci/share/classes
+
+################################################################################
+# Compile the annotation processor
+
+$(eval $(call SetupJavaCompilation, BUILD_JVMCI_OPTIONS, \
+    SETUP := GENERATE_OLDBYTECODE, \
+    SRC := $(SRC_DIR)/jdk.vm.ci.options/src \
+        $(SRC_DIR)/jdk.vm.ci.options.processor/src \
+        $(SRC_DIR)/jdk.vm.ci.inittimer/src, \
+    BIN := $(BUILDTOOLS_OUTPUTDIR)/jvmci_options, \
+    JAR := $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.options.jar, \
+))
+
+$(eval $(call SetupJavaCompilation, BUILD_JVMCI_SERVICE, \
+    SETUP := GENERATE_OLDBYTECODE, \
+    SRC := $(SRC_DIR)/jdk.vm.ci.service/src \
+        $(SRC_DIR)/jdk.vm.ci.service.processor/src, \
+    BIN := $(BUILDTOOLS_OUTPUTDIR)/jvmci_service, \
+    JAR := $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.service.jar, \
+))
+
+################################################################################
+
+PROC_SRC_SUBDIRS := \
+    jdk.vm.ci.compiler \
+    jdk.vm.ci.hotspot \
+    jdk.vm.ci.hotspot.amd64 \
+    jdk.vm.ci.hotspot.sparc \
+    #
+
+PROC_SRC_DIRS := $(patsubst %, $(SRC_DIR)/%/src, $(PROC_SRC_SUBDIRS))
+
+PROC_SRCS := $(filter %.java, $(call CacheFind, $(PROC_SRC_DIRS)))
+
+ALL_SRC_DIRS := $(wildcard $(SRC_DIR)/*/src)
+SOURCEPATH := $(call PathList, $(ALL_SRC_DIRS))
+PROCESSOR_PATH := $(call PathList, \
+    $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.options.jar \
+    $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.service.jar)
+
+$(GENSRC_DIR)/_gensrc_proc_done: $(PROC_SRCS) \
+    $(BUILD_JVMCI_OPTIONS) $(BUILD_JVMCI_SERVICE)
+	$(MKDIR) -p $(@D)
+	$(eval $(call ListPathsSafely,PROC_SRCS,$(@D)/_gensrc_proc_files))
+	$(JAVA_SMALL) $(NEW_JAVAC) \
+	    -sourcepath $(SOURCEPATH) \
+	    -implicit:none \
+	    -proc:only \
+	    -processorpath $(PROCESSOR_PATH) \
+	    -d $(GENSRC_DIR) \
+	    -s $(GENSRC_DIR) \
+	    @$(@D)/_gensrc_proc_files
+	$(TOUCH) $@
+
+TARGETS += $(GENSRC_DIR)/_gensrc_proc_done
+
+################################################################################
+
+$(GENSRC_DIR)/META-INF/services/jdk.vm.ci.options.OptionDescriptors: \
+    $(GENSRC_DIR)/_gensrc_proc_done
+	$(MKDIR) -p $(@D)
+	($(CD) $(GENSRC_DIR)/META-INF/jvmci.options && \
+	    $(RM) -f $@; \
+	    for i in $$(ls); do \
+	      echo $${i}_OptionDescriptors >> $@; \
+	    done)
+
+TARGETS += $(GENSRC_DIR)/META-INF/services/jdk.vm.ci.options.OptionDescriptors
+
+################################################################################
+
+$(GENSRC_DIR)/_providers_converted: $(GENSRC_DIR)/_gensrc_proc_done
+	$(MKDIR) -p $(GENSRC_DIR)/META-INF/services
+	($(CD) $(GENSRC_DIR)/META-INF/jvmci.providers && \
+	    for i in $$($(LS)); do \
+	      c=$$($(CAT) $$i | $(TR) -d '\n\r'); \
+	      $(ECHO) $$i >> $(GENSRC_DIR)/META-INF/services/$$c; \
+	    done)
+	$(TOUCH) $@
+
+TARGETS += $(GENSRC_DIR)/_providers_converted
+
+################################################################################
+
+all: $(TARGETS)
+
+.PHONY: default all
--- a/hotspot/make/linux/makefiles/compiler1.make
+++ b/hotspot/make/linux/makefiles/compiler1.make
@ -28,4 +28,7 @@ TYPE=COMPILER1

 VM_SUBDIR = client

+# We don't support the JVMCI in a client VM.
+INCLUDE_JVMCI := false
+
 CFLAGS += -DCOMPILER1
--- a/hotspot/make/linux/makefiles/gcc.make
+++ b/hotspot/make/linux/makefiles/gcc.make
@ -213,12 +213,16 @@ ifeq ($(USE_CLANG),)
  # Since GCC 4.3, -Wconversion has changed its meanings to warn these implicit
  # conversions which might affect the values. Only enable it in earlier versions.
  ifeq "$(shell expr \( $(CC_VER_MAJOR) \> 4 \) \| \( \( $(CC_VER_MAJOR) = 4 \) \& \( $(CC_VER_MINOR) \>= 3 \) \))" "0"
+    # GCC < 4.3
    WARNING_FLAGS += -Wconversion
  endif  
  ifeq "$(shell expr \( $(CC_VER_MAJOR) \> 4 \) \| \( \( $(CC_VER_MAJOR) = 4 \) \& \( $(CC_VER_MINOR) \>= 8 \) \))" "1"
+    # GCC >= 4.8
    # This flag is only known since GCC 4.3. Gcc 4.8 contains a fix so that with templates no
    # warnings are issued: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11856
    WARNING_FLAGS += -Wtype-limits
+    # GCC < 4.8 don't accept this flag for C++.
+    WARNING_FLAGS += -Wno-format-zero-length
  endif
 endif

--- a/hotspot/make/linux/makefiles/minimal1.make
+++ b/hotspot/make/linux/makefiles/minimal1.make
@ -38,6 +38,7 @@ INCLUDE_ALL_GCS := false
 INCLUDE_NMT := false
 INCLUDE_TRACE := false
 INCLUDE_CDS := false
+INCLUDE_JVMCI := false

 CXXFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\"
 CFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\"
--- a/hotspot/make/solaris/makefiles/compiler1.make
+++ b/hotspot/make/solaris/makefiles/compiler1.make
@ -28,4 +28,7 @@ TYPE=COMPILER1

 VM_SUBDIR = client

+# We don't support the JVMCI in a client VM.
+INCLUDE_JVMCI := false
+
 CFLAGS += -DCOMPILER1
--- a/hotspot/make/windows/build_vm_def.sh
+++ b/hotspot/make/windows/build_vm_def.sh
@ -52,6 +52,7 @@ UNIQ="$MKS_HOME/uniq.exe"
 CAT="$MKS_HOME/cat.exe"
 RM="$MKS_HOME/rm.exe"
 DUMPBIN="link.exe /dump"
+export VS_UNICODE_OUTPUT= 

 if [ "$1" = "-nosa" ]; then
    echo EXPORTS > vm.def
--- a/hotspot/make/windows/create_obj_files.sh
+++ b/hotspot/make/windows/create_obj_files.sh
@ -111,6 +111,7 @@ esac

 COMPILER2_SPECIFIC_FILES="opto libadt bcEscapeAnalyzer.cpp c2_* runtime_*"
 COMPILER1_SPECIFIC_FILES="c1_*"
+JVMCI_SPECIFIC_FILES="*jvmci* *JVMCI*"
 SHARK_SPECIFIC_FILES="shark"
 ZERO_SPECIFIC_FILES="zero"

@ -119,11 +120,11 @@ Src_Files_EXCLUDE="jsig.c jvmtiEnvRecommended.cpp jvmtiEnvStub.cpp"

 # Exclude per type.
 case "${TYPE}" in
-    "compiler1") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER2_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;;
+    "compiler1") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER2_SPECIFIC_FILES} ${JVMCI_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;;
    "compiler2") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES}" ;;
    "tiered")    Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES}" ;;
-    "zero")      Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;;
-    "shark")     Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES}" ;;
+    "zero")      Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${JVMCI_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;;
+    "shark")     Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${JVMCI_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES}" ;;
 esac

 # Special handling of arch model.
--- a/hotspot/make/windows/makefiles/compile.make
+++ b/hotspot/make/windows/makefiles/compile.make
@ -1,5 +1,5 @@
 #
-# Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@ -31,6 +31,7 @@ CXX=cl.exe
 #   /nologo   Supress copyright message at every cl.exe startup
 #   /W3       Warning level 3
 #   /Zi       Include debugging information
+#   /d2Zi+    Extended debugging symbols for optimized code (/Zo in VS2013 Update 3 and later)
 #   /WX       Treat any warning error as a fatal error
 #   /MD       Use dynamic multi-threaded runtime (msvcrt.dll or msvc*NN.dll)
 #   /MTd      Use static multi-threaded runtime debug versions
@ -57,7 +58,7 @@ CXX_FLAGS=$(EXTRA_CFLAGS) /nologo /W3 /WX

 # Let's add debug information when Full Debug Symbols is enabled
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
-CXX_FLAGS=$(CXX_FLAGS) /Zi
+CXX_FLAGS=$(CXX_FLAGS) /Zi /d2Zi+
 !endif

 # Based on BUILDARCH we add some flags and select the default compiler name
--- a/hotspot/make/windows/makefiles/projectcreator.make
+++ b/hotspot/make/windows/makefiles/projectcreator.make
@ -145,6 +145,10 @@ ProjectCreatorIDEOptionsIgnoreCompiler1=\
 -ignorePath_TARGET tiered \
 -ignorePath_TARGET c1_

+ProjectCreatorIDEOptionsIgnoreJVMCI=\
+ -ignorePath_TARGET src/share/vm/jvmci \
+ -ignorePath_TARGET vm/jvmci
+
 ProjectCreatorIDEOptionsIgnoreCompiler2=\
 -ignorePath_TARGET compiler2 \
 -ignorePath_TARGET tiered \
@ -165,6 +169,8 @@ ProjectCreatorIDEOptionsIgnoreCompiler2=\
 ##################################################
 ProjectCreatorIDEOptions=$(ProjectCreatorIDEOptions) \
 -define_compiler1 COMPILER1 \
+ -define_compiler1 INCLUDE_JVMCI=0 \
+$(ProjectCreatorIDEOptionsIgnoreJVMCI:TARGET=compiler1) \
 $(ProjectCreatorIDEOptionsIgnoreCompiler2:TARGET=compiler1)

 ##################################################
--- a/hotspot/make/windows/makefiles/vm.make
+++ b/hotspot/make/windows/makefiles/vm.make
@ -40,7 +40,7 @@ CXX_FLAGS=$(CXX_FLAGS) /homeparams
 !endif

 !if "$(Variant)" == "compiler1"
-CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1"
+CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1" /D INCLUDE_JVMCI=0
 !endif

 !if "$(Variant)" == "compiler2"
@ -152,6 +152,7 @@ VM_PATH=$(VM_PATH);../generated/adfiles
 VM_PATH=$(VM_PATH);../generated/jvmtifiles
 VM_PATH=$(VM_PATH);../generated/tracefiles
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/c1
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/jvmci
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/compiler
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/code
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/interpreter
@ -163,6 +164,7 @@ VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc/serial
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc/cms
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc/g1
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/logging
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/oops
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/prims
@ -232,6 +234,9 @@ bytecodeInterpreterWithChecks.obj: ..\generated\jvmtifiles\bytecodeInterpreterWi
 {$(COMMONSRC)\share\vm\classfile}.cpp.obj::
        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<

+{$(COMMONSRC)\share\vm\jvmci}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
 {$(COMMONSRC)\share\vm\gc\parallel}.cpp.obj::
        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<

@ -250,6 +255,9 @@ bytecodeInterpreterWithChecks.obj: ..\generated\jvmtifiles\bytecodeInterpreterWi
 {$(COMMONSRC)\share\vm\asm}.cpp.obj::
        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<

+{$(COMMONSRC)\share\vm\logging}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
 {$(COMMONSRC)\share\vm\memory}.cpp.obj::
        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<

@ -330,6 +338,9 @@ bytecodeInterpreterWithChecks.obj: ..\generated\jvmtifiles\bytecodeInterpreterWi
 {$(ALTSRC)\share\vm\asm}.cpp.obj::
        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<

+{$(ALTSRC)\share\vm\logging}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
 {$(ALTSRC)\share\vm\memory}.cpp.obj::
        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<

--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
@ -2311,6 +2311,12 @@ public:

 #define MSG "invalid arrangement"

+#define ASSERTION (T == T2S || T == T4S || T == T2D)
+  INSN(fsqrt, 1, 0b11111);
+  INSN(fabs,  0, 0b01111);
+  INSN(fneg,  1, 0b01111);
+#undef ASSERTION
+
 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
  INSN(rev64, 0, 0b00000);
 #undef ASSERTION
--- a/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp
@ -68,10 +68,11 @@ define_pd_global(intx, RegisterCostAreaRatio,        16000);

 // Peephole and CISC spilling both break the graph, and so makes the
 // scheduler sick.
-define_pd_global(bool, OptoPeephole,                 true);
+define_pd_global(bool, OptoPeephole,                 false);
 define_pd_global(bool, UseCISCSpill,                 true);
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
+define_pd_global(bool, OptoRegScheduling,            false);

 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
--- a/hotspot/src/cpu/aarch64/vm/compiledIC_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/compiledIC_aarch64.cpp
@ -51,13 +51,15 @@ bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
 // ----------------------------------------------------------------------------

 #define __ _masm.
-address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
  // Stub is fixed up when the corresponding call is converted from
  // calling compiled code to calling interpreted code.
  // mov rmethod, 0
  // jmp -4 # to self

-  address mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  if (mark == NULL) {
+    mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  }

  // Note that the code buffer's insts_mark is always relative to insts.
  // That's why we must use the macroassembler to generate a stub.
--- a/hotspot/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp
@ -30,5 +30,6 @@

  void generate_more_monitors();
  void generate_deopt_handling();
+  void lock_method(void);

 #endif // CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP
--- a/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp
@ -42,6 +42,11 @@

 // Implementation of InterpreterMacroAssembler

+void InterpreterMacroAssembler::jump_to_entry(address entry) {
+  assert(entry, "Entry must have been generated by now");
+  b(entry);
+}
+
 #ifndef CC_INTERP

 void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
@ -1542,14 +1547,14 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca
    if (MethodData::profile_arguments()) {
      Label done;
      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
-      add(mdp, mdp, off_to_args);

      for (int i = 0; i < TypeProfileArgsLimit; i++) {
        if (i > 0 || MethodData::profile_return()) {
          // If return value type is profiled we may have no argument to profile
-          ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args));
+          ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
          sub(tmp, tmp, i*TypeStackSlotEntries::per_arg_count());
          cmp(tmp, TypeStackSlotEntries::per_arg_count());
+          add(rscratch1, mdp, off_to_args);
          br(Assembler::LT, done);
        }
        ldr(tmp, Address(callee, Method::const_offset()));
@ -1557,26 +1562,27 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca
        // stack offset o (zero based) from the start of the argument
        // list, for n arguments translates into offset n - o - 1 from
        // the end of the argument list
-        ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args));
+        ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))));
        sub(tmp, tmp, rscratch1);
        sub(tmp, tmp, 1);
        Address arg_addr = argument_address(tmp);
        ldr(tmp, arg_addr);

-        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
+        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i)));
        profile_obj_type(tmp, mdo_arg_addr);

        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
-        add(mdp, mdp, to_add);
        off_to_args += to_add;
      }

      if (MethodData::profile_return()) {
-        ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args));
+        ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
        sub(tmp, tmp, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
      }

+      add(rscratch1, mdp, off_to_args);
      bind(done);
+      mov(mdp, rscratch1);

      if (MethodData::profile_return()) {
        // We're right after the type profile for the last
--- a/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.hpp
@ -66,6 +66,8 @@ class InterpreterMacroAssembler: public MacroAssembler {

  void load_earlyret_value(TosState state);

+  void jump_to_entry(address entry);
+
 #ifdef CC_INTERP
  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
--- a/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp
@ -41,14 +41,13 @@ private:
  address generate_native_entry(bool synchronized);
  address generate_abstract_entry(void);
  address generate_math_entry(AbstractInterpreter::MethodKind kind);
-  address generate_jump_to_normal_entry(void);
-  address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); }
-  address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
+  address generate_accessor_entry(void) { return NULL; }
+  address generate_empty_entry(void) { return NULL; }
  void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
  address generate_Reference_get_entry();
  address generate_CRC32_update_entry();
  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
-  void lock_method(void);
+  address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
  void generate_stack_overflow_check(void);

  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
--- a/hotspot/src/cpu/aarch64/vm/interpreter_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/interpreter_aarch64.cpp
@ -236,17 +236,6 @@ void InterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::Me
  __ blrt(rscratch1, gpargs, fpargs, rtype);
 }

-// Jump into normal path for accessor and empty entry to jump to normal entry
-// The "fast" optimization don't update compilation count therefore can disable inlining
-// for these functions that should be inlined.
-address InterpreterGenerator::generate_jump_to_normal_entry(void) {
-  address entry_point = __ pc();
-
-  assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated");
-  __ b(Interpreter::entry_for_kind(Interpreter::zerolocals));
-  return entry_point;
-}
-
 // Abstract method entry
 // Attempt to execute abstract method. Throw exception
 address InterpreterGenerator::generate_abstract_entry(void) {
--- a/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp
@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_aarch64.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+  Unimplemented();
+  return 0;
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+  Unimplemented();
+}
+
+// convert JVMCI register indices (as used in oop maps) to HotSpot registers
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+  return NULL;
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+  return false;
+}
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
@ -1709,6 +1709,20 @@ int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb,
  return idivq_offset;
 }

+void MacroAssembler::membar(Membar_mask_bits order_constraint) {
+  address prev = pc() - NativeMembar::instruction_size;
+  if (prev == code()->last_membar()) {
+    NativeMembar *bar = NativeMembar_at(prev);
+    // We are merging two memory barrier instructions.  On AArch64 we
+    // can do this simply by ORing them together.
+    bar->set_kind(bar->get_kind() | order_constraint);
+    BLOCK_COMMENT("merged membar");
+  } else {
+    code()->set_last_membar(pc());
+    dmb(Assembler::barrier(order_constraint));
+  }
+}
+
 // MacroAssembler routines found actually to be needed

 void MacroAssembler::push(Register src)
@ -2238,7 +2252,7 @@ void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
    ttyLocker ttyl;
    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
                    msg);
-    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+    assert(false, "DEBUG MESSAGE: %s", msg);
  }
 }

@ -2286,18 +2300,30 @@ void MacroAssembler::c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_t
 }
 #endif

-void MacroAssembler::push_CPU_state() {
-    push(0x3fffffff, sp);         // integer registers except lr & sp
+void MacroAssembler::push_CPU_state(bool save_vectors) {
+  push(0x3fffffff, sp);         // integer registers except lr & sp

+  if (!save_vectors) {
    for (int i = 30; i >= 0; i -= 2)
      stpd(as_FloatRegister(i), as_FloatRegister(i+1),
           Address(pre(sp, -2 * wordSize)));
+  } else {
+    for (int i = 30; i >= 0; i -= 2)
+      stpq(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(pre(sp, -4 * wordSize)));
+  }
 }

-void MacroAssembler::pop_CPU_state() {
-  for (int i = 0; i < 32; i += 2)
-    ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
-         Address(post(sp, 2 * wordSize)));
+void MacroAssembler::pop_CPU_state(bool restore_vectors) {
+  if (!restore_vectors) {
+    for (int i = 0; i < 32; i += 2)
+      ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 2 * wordSize)));
+  } else {
+    for (int i = 0; i < 32; i += 2)
+      ldpq(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 4 * wordSize)));
+  }

  pop(0x3fffffff, sp);         // integer registers except lr & sp
 }
@ -3027,6 +3053,24 @@ SkipIfEqual::~SkipIfEqual() {
  _masm->bind(_label);
 }

+void MacroAssembler::addptr(const Address &dst, int32_t src) {
+  Address adr;
+  switch(dst.getMode()) {
+  case Address::base_plus_offset:
+    // This is the expected mode, although we allow all the other
+    // forms below.
+    adr = form_address(rscratch2, dst.base(), dst.offset(), LogBytesPerWord);
+    break;
+  default:
+    lea(rscratch2, dst);
+    adr = Address(rscratch2);
+    break;
+  }
+  ldr(rscratch1, adr);
+  add(rscratch1, rscratch1, src);
+  str(rscratch1, adr);
+}
+
 void MacroAssembler::cmpptr(Register src1, Address src2) {
  unsigned long offset;
  adrp(rscratch1, src2, offset);
@ -3063,11 +3107,15 @@ void MacroAssembler::store_check(Register obj) {

  if (UseCondCardMark) {
    Label L_already_dirty;
+    membar(StoreLoad);
    ldrb(rscratch2,  Address(obj, rscratch1));
    cbz(rscratch2, L_already_dirty);
    strb(zr, Address(obj, rscratch1));
    bind(L_already_dirty);
  } else {
+    if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
+      membar(StoreStore);
+    }
    strb(zr, Address(obj, rscratch1));
  }
 }
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@ -152,6 +152,13 @@ class MacroAssembler: public Assembler {
    strw(scratch, a);
  }

+  void bind(Label& L) {
+    Assembler::bind(L);
+    code()->clear_last_membar();
+  }
+
+  void membar(Membar_mask_bits order_constraint);
+
  // Frame creation and destruction shared between JITs.
  void build_frame(int framesize);
  void remove_frame(int framesize);
@ -777,8 +784,8 @@ public:

  DEBUG_ONLY(void verify_heapbase(const char* msg);)

-  void push_CPU_state();
-  void pop_CPU_state() ;
+  void push_CPU_state(bool save_vectors = false);
+  void pop_CPU_state(bool restore_vectors = false) ;

  // Round up to a power of two
  void round_to(Register reg, int modulus);
@ -908,13 +915,7 @@ public:

  // Arithmetics

-  void addptr(Address dst, int32_t src) {
-    lea(rscratch2, dst);
-    ldr(rscratch1, Address(rscratch2));
-    add(rscratch1, rscratch1, src);
-    str(rscratch1, Address(rscratch2));
-  }
-
+  void addptr(const Address &dst, int32_t src);
  void cmpptr(Register src1, Address src2);

  // Various forms of CAS
--- a/hotspot/src/cpu/aarch64/vm/methodHandles_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/methodHandles_aarch64.cpp
@ -50,7 +50,7 @@ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_

 #ifdef ASSERT
 static int check_nonzero(const char* xname, int x) {
-  assert(x != 0, err_msg("%s should be nonzero", xname));
+  assert(x != 0, "%s should be nonzero", xname);
  return x;
 }
 #define NONZERO(x) check_nonzero(#x, x)
@ -407,7 +407,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
    }

    default:
-      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
      break;
    }

--- a/hotspot/src/cpu/aarch64/vm/nativeInst_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/nativeInst_aarch64.hpp
@ -101,6 +101,12 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {
  static bool maybe_cpool_ref(address instr) {
    return is_adrp_at(instr) || is_ldr_literal_at(instr);
  }
+
+  bool is_Membar() {
+    unsigned int insn = uint_at(0);
+    return Instruction_aarch64::extract(insn, 31, 12) == 0b11010101000000110011 &&
+      Instruction_aarch64::extract(insn, 7, 0) == 0b10111111;
+  }
 };

 inline NativeInstruction* nativeInstruction_at(address address) {
@ -487,4 +493,15 @@ inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
  return (NativeCallTrampolineStub*)addr;
 }

+class NativeMembar : public NativeInstruction {
+public:
+  unsigned int get_kind() { return Instruction_aarch64::extract(uint_at(0), 11, 8); }
+  void set_kind(int order_kind) { Instruction_aarch64::patch(addr_at(0), 11, 8, order_kind); }
+};
+
+inline NativeMembar *NativeMembar_at(address addr) {
+  assert(nativeInstruction_at(addr)->is_Membar(), "no membar found");
+  return (NativeMembar*)addr;
+}
+
 #endif // CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP
--- a/hotspot/src/cpu/aarch64/vm/relocInfo_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/relocInfo_aarch64.cpp
@ -102,12 +102,5 @@ void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffe
  }
 }

-void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest)  {
-  if (NativeInstruction::maybe_cpool_ref(addr())) {
-    address old_addr = old_addr_for(addr(), src, dest);
-    MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr));
-  }
-}
-
 void metadata_Relocation::pd_fix_value(address x) {
 }
--- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
@ -75,8 +75,8 @@ class SimpleRuntimeFrame {
 // FIXME -- this is used by C1
 class RegisterSaver {
 public:
-  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
-  static void restore_live_registers(MacroAssembler* masm);
+  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
+  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);

  // Offsets into the register save area
  // Used by deoptimization when it is managing result register
@ -108,7 +108,17 @@ class RegisterSaver {

 };

-OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
+#ifdef COMPILER2
+  if (save_vectors) {
+    // Save upper half of vector registers
+    int vect_words = 32 * 8 / wordSize;
+    additional_frame_words += vect_words;
+  }
+#else
+  assert(!save_vectors, "vectors are generated only by C2");
+#endif
+
  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
                                     reg_save_size*BytesPerInt, 16);
  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
@ -122,7 +132,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
  // Save registers, fpu state, and flags.

  __ enter();
-  __ push_CPU_state();
+  __ push_CPU_state(save_vectors);

  // Set an oopmap for the call site.  This oopmap will map all
  // oop-registers and debug-info registers as callee-saved.  This
@ -139,14 +149,14 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
                                    // register slots are 8 bytes
                                    // wide, 32 floating-point
                                    // registers
-      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
                                r->as_VMReg());
    }
  }

  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
    FloatRegister r = as_FloatRegister(i);
-    int sp_offset = 2 * i;
+    int sp_offset = save_vectors ? (4 * i) : (2 * i);
    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
                              r->as_VMReg());
  }
@ -154,8 +164,11 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
  return oop_map;
 }

-void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-  __ pop_CPU_state();
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
+#ifndef COMPILER2
+  assert(!restore_vectors, "vectors are generated only by C2");
+#endif
+  __ pop_CPU_state(restore_vectors);
  __ leave();
 }

@ -177,9 +190,9 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 }

 // Is vector's size (in bytes) bigger than a size saved by default?
-// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
+// 8 bytes vector registers are saved by default on AArch64.
 bool SharedRuntime::is_wide_vector(int size) {
-  return size > 16;
+  return size > 8;
 }
 // The java_calling_convention describes stack locations as ideal slots on
 // a frame with no abi restrictions. Since we must observe abi restrictions
@ -460,11 +473,11 @@ static void gen_c2i_adapter(MacroAssembler *masm,
 }


-static void gen_i2c_adapter(MacroAssembler *masm,
-                            int total_args_passed,
-                            int comp_args_on_stack,
-                            const BasicType *sig_bt,
-                            const VMRegPair *regs) {
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+                                    int total_args_passed,
+                                    int comp_args_on_stack,
+                                    const BasicType *sig_bt,
+                                    const VMRegPair *regs) {

  // Note: r13 contains the senderSP on entry. We must preserve it since
  // we may do a i2c -> c2i transition if we lose a race where compiled
@ -1146,7 +1159,7 @@ static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs,
    assert((unsigned)gpargs < 256, "eek!");
    assert((unsigned)fpargs < 32, "eek!");
    __ lea(rscratch1, RuntimeAddress(dest));
-    __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type);
+    if (UseBuiltinSim)   __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type);
    __ blrt(rscratch1, rscratch2);
    __ maybe_isb();
  }
@ -1194,7 +1207,7 @@ static void gen_special_dispatch(MacroAssembler* masm,
  } else if (iid == vmIntrinsics::_invokeBasic) {
    has_receiver = true;
  } else {
-    fatal(err_msg_res("unexpected intrinsic id %d", iid));
+    fatal("unexpected intrinsic id %d", iid);
  }

  if (member_reg != noreg) {
@ -1521,14 +1534,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,

  int vep_offset = ((intptr_t)__ pc()) - start;

-  // Generate stack overflow check
-
  // If we have to make this method not-entrant we'll overwrite its
  // first instruction with a jump.  For this action to be legal we
  // must ensure that this first instruction is a B, BL, NOP, BKPT,
  // SVC, HVC, or SMC.  Make it a NOP.
  __ nop();

+  // Generate stack overflow check
  if (UseStackBanging) {
    __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
  } else {
@ -1709,23 +1721,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  // need to spill before we call out
  int c_arg = total_c_args - total_in_args;

-  // Pre-load a static method's oop into r20.  Used both by locking code and
-  // the normal JNI call code.
+  // Pre-load a static method's oop into c_rarg1.
  if (method->is_static() && !is_critical_native) {

    //  load oop into a register
-    __ movoop(oop_handle_reg,
+    __ movoop(c_rarg1,
              JNIHandles::make_local(method->method_holder()->java_mirror()),
              /*immediate*/true);

    // Now handlize the static class mirror it's known not-null.
-    __ str(oop_handle_reg, Address(sp, klass_offset));
+    __ str(c_rarg1, Address(sp, klass_offset));
    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));

    // Now get the handle
-    __ lea(oop_handle_reg, Address(sp, klass_offset));
-    // store the klass handle as second argument
-    __ mov(c_rarg1, oop_handle_reg);
+    __ lea(c_rarg1, Address(sp, klass_offset));
    // and protect the arg if we must spill
    c_arg--;
  }
@ -1740,19 +1749,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,

  __ set_last_Java_frame(sp, noreg, (address)the_pc, rscratch1);

-
-  // We have all of the arguments setup at this point. We must not touch any register
-  // argument registers at this point (what if we save/restore them there are no oop?
-
+  Label dtrace_method_entry, dtrace_method_entry_done;
  {
-    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
-    // protect the args we've loaded
-    save_args(masm, total_c_args, c_arg, out_regs);
-    __ mov_metadata(c_rarg1, method());
-    __ call_VM_leaf(
-      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
-      rthread, c_rarg1);
-    restore_args(masm, total_c_args, c_arg, out_regs);
+    unsigned long offset;
+    __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset);
+    __ ldrb(rscratch1, Address(rscratch1, offset));
+    __ cbnzw(rscratch1, dtrace_method_entry);
+    __ bind(dtrace_method_entry_done);
  }

  // RedefineClasses() tracing support for obsolete method entry
@ -1782,7 +1785,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  if (method->is_synchronized()) {
    assert(!is_critical_native, "unhandled");

-
    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();

    // Get the handle (the 2nd argument)
@ -1838,7 +1840,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,

  // Finally just about ready to make the JNI call

-
  // get JNIEnv* which is first argument to native
  if (!is_critical_native) {
    __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
@ -1879,9 +1880,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,

  // Unpack native results.
  switch (ret_type) {
-  case T_BOOLEAN: __ ubfx(r0, r0, 0, 8);            break;
+  case T_BOOLEAN: __ ubfx(r0, r0, 0, 8);             break;
  case T_CHAR   : __ ubfx(r0, r0, 0, 16);            break;
-  case T_BYTE   : __ sbfx(r0, r0, 0, 8);            break;
+  case T_BYTE   : __ sbfx(r0, r0, 0, 8);             break;
  case T_SHORT  : __ sbfx(r0, r0, 0, 16);            break;
  case T_INT    : __ sbfx(r0, r0, 0, 32);            break;
  case T_DOUBLE :
@ -1904,14 +1905,17 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  //     Thread A is resumed to finish this native method, but doesn't block here since it
  //     didn't see any synchronization is progress, and escapes.
  __ mov(rscratch1, _thread_in_native_trans);
-  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
-  __ stlrw(rscratch1, rscratch2);

  if(os::is_MP()) {
    if (UseMembar) {
+      __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
+
      // Force this write out before the read below
      __ dmb(Assembler::SY);
    } else {
+      __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+      __ stlrw(rscratch1, rscratch2);
+
      // Write serialization page so VM thread can do a pseudo remote membar.
      // We use the current thread pointer to calculate a thread specific
      // offset to write to within the page. This minimizes bus traffic
@ -1920,54 +1924,23 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
    }
  }

-  Label after_transition;
-
  // check for safepoint operation in progress and/or pending suspend requests
+  Label safepoint_in_progress, safepoint_in_progress_done;
  {
-    Label Continue;
-
-    { unsigned long offset;
-      __ adrp(rscratch1,
-              ExternalAddress((address)SafepointSynchronize::address_of_state()),
-              offset);
-      __ ldrw(rscratch1, Address(rscratch1, offset));
-    }
-    __ cmpw(rscratch1, SafepointSynchronize::_not_synchronized);
-
-    Label L;
-    __ br(Assembler::NE, L);
+    assert(SafepointSynchronize::_not_synchronized == 0, "fix this code");
+    unsigned long offset;
+    __ adrp(rscratch1,
+            ExternalAddress((address)SafepointSynchronize::address_of_state()),
+            offset);
+    __ ldrw(rscratch1, Address(rscratch1, offset));
+    __ cbnzw(rscratch1, safepoint_in_progress);
    __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
-    __ cbz(rscratch1, Continue);
-    __ bind(L);
-
-    // Don't use call_VM as it will see a possible pending exception and forward it
-    // and never return here preventing us from clearing _last_native_pc down below.
-    //
-    save_native_result(masm, ret_type, stack_slots);
-    __ mov(c_rarg0, rthread);
-#ifndef PRODUCT
-  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-#endif
-    if (!is_critical_native) {
-      __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
-    } else {
-      __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
-    }
-    __ blrt(rscratch1, 1, 0, 1);
-    __ maybe_isb();
-    // Restore any method result value
-    restore_native_result(masm, ret_type, stack_slots);
-
-    if (is_critical_native) {
-      // The call above performed the transition to thread_in_Java so
-      // skip the transition logic below.
-      __ b(after_transition);
-    }
-
-    __ bind(Continue);
+    __ cbnzw(rscratch1, safepoint_in_progress);
+    __ bind(safepoint_in_progress_done);
  }

  // change thread state
+  Label after_transition;
  __ mov(rscratch1, _thread_in_Java);
  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
  __ stlrw(rscratch1, rscratch2);
@ -2024,16 +1997,15 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
    }

    __ bind(done);
-
  }
+
+  Label dtrace_method_exit, dtrace_method_exit_done;
  {
-    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
-    save_native_result(masm, ret_type, stack_slots);
-    __ mov_metadata(c_rarg1, method());
-    __ call_VM_leaf(
-         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
-         rthread, c_rarg1);
-    restore_native_result(masm, ret_type, stack_slots);
+    unsigned long offset;
+    __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset);
+    __ ldrb(rscratch1, Address(rscratch1, offset));
+    __ cbnzw(rscratch1, dtrace_method_exit);
+    __ bind(dtrace_method_exit_done);
  }

  __ reset_last_Java_frame(false, true);
@ -2082,7 +2054,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  // Slow path locking & unlocking
  if (method->is_synchronized()) {

-    // BEGIN Slow path lock
+    __ block_comment("Slow path lock {");
    __ bind(slow_path_lock);

    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
@ -2109,9 +2081,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 #endif
    __ b(lock_done);

-    // END Slow path lock
+    __ block_comment("} Slow path lock");

-    // BEGIN Slow path unlock
+    __ block_comment("Slow path unlock {");
    __ bind(slow_path_unlock);

    // If we haven't already saved the native result we must save it now as xmm registers
@ -2149,7 +2121,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
    }
    __ b(unlock_done);

-    // END Slow path unlock
+    __ block_comment("} Slow path unlock");

  } // synchronized

@ -2162,6 +2134,69 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  // and continue
  __ b(reguard_done);

+  // SLOW PATH safepoint
+  {
+    __ block_comment("safepoint {");
+    __ bind(safepoint_in_progress);
+
+    // Don't use call_VM as it will see a possible pending exception and forward it
+    // and never return here preventing us from clearing _last_native_pc down below.
+    //
+    save_native_result(masm, ret_type, stack_slots);
+    __ mov(c_rarg0, rthread);
+#ifndef PRODUCT
+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+    if (!is_critical_native) {
+      __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+    } else {
+      __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
+    }
+    __ blrt(rscratch1, 1, 0, 1);
+    __ maybe_isb();
+    // Restore any method result value
+    restore_native_result(masm, ret_type, stack_slots);
+
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic above.
+      __ b(after_transition);
+    }
+
+    __ b(safepoint_in_progress_done);
+    __ block_comment("} safepoint");
+  }
+
+  // SLOW PATH dtrace support
+  {
+    __ block_comment("dtrace entry {");
+    __ bind(dtrace_method_entry);
+
+    // We have all of the arguments setup at this point. We must not touch any register
+    // argument registers at this point (what if we save/restore them there are no oop?
+
+    save_args(masm, total_c_args, c_arg, out_regs);
+    __ mov_metadata(c_rarg1, method());
+    __ call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+      rthread, c_rarg1);
+    restore_args(masm, total_c_args, c_arg, out_regs);
+    __ b(dtrace_method_entry_done);
+    __ block_comment("} dtrace entry");
+  }
+
+  {
+    __ block_comment("dtrace exit {");
+    __ bind(dtrace_method_exit);
+    save_native_result(masm, ret_type, stack_slots);
+    __ mov_metadata(c_rarg1, method());
+    __ call_VM_leaf(
+         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+         rthread, c_rarg1);
+    restore_native_result(masm, ret_type, stack_slots);
+    __ b(dtrace_method_exit_done);
+    __ block_comment("} dtrace exit");
+  }


  __ flush();
@ -2742,7 +2777,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
  bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);

  // Save registers, fpu state, and flags
-  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);

  // The following is basically a call_VM.  However, we need the precise
  // address of the call in order to generate an oopmap. Hence, we do all the
@ -2793,7 +2828,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
  __ bind(noException);

  // Normal exit, restore registers and exit.
-  RegisterSaver::restore_live_registers(masm);
+  RegisterSaver::restore_live_registers(masm, save_vectors);

  __ ret(lr);

--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
@ -746,6 +746,9 @@ class StubGenerator: public StubCodeGenerator {
          const Register count = end; // 'end' register contains bytes count now
          __ mov(scratch, (address)ct->byte_map_base);
          __ add(start, start, scratch);
+          if (UseConcMarkSweepGC) {
+            __ membar(__ StoreStore);
+          }
          __ BIND(L_loop);
          __ strb(zr, Address(start, count));
          __ subs(count, count, 1);
@ -2395,6 +2398,274 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

+  /***
+   *  Arguments:
+   *
+   *  Inputs:
+   *   c_rarg0   - int   adler
+   *   c_rarg1   - byte* buff
+   *   c_rarg2   - int   len
+   *
+   * Output:
+   *   c_rarg0   - int adler result
+   */
+  address generate_updateBytesAdler32() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
+    address start = __ pc();
+
+    Label L_simple_by1_loop, L_nmax, L_nmax_loop, L_by16, L_by16_loop, L_by1_loop, L_do_mod, L_combine, L_by1;
+
+    // Aliases
+    Register adler  = c_rarg0;
+    Register s1     = c_rarg0;
+    Register s2     = c_rarg3;
+    Register buff   = c_rarg1;
+    Register len    = c_rarg2;
+    Register nmax  = r4;
+    Register base = r5;
+    Register count = r6;
+    Register temp0 = rscratch1;
+    Register temp1 = rscratch2;
+    Register temp2 = r7;
+
+    // Max number of bytes we can process before having to take the mod
+    // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+    unsigned long BASE = 0xfff1;
+    unsigned long NMAX = 0x15B0;
+
+    __ mov(base, BASE);
+    __ mov(nmax, NMAX);
+
+    // s1 is initialized to the lower 16 bits of adler
+    // s2 is initialized to the upper 16 bits of adler
+    __ ubfx(s2, adler, 16, 16);  // s2 = ((adler >> 16) & 0xffff)
+    __ uxth(s1, adler);          // s1 = (adler & 0xffff)
+
+    // The pipelined loop needs at least 16 elements for 1 iteration
+    // It does check this, but it is more effective to skip to the cleanup loop
+    __ cmp(len, 16);
+    __ br(Assembler::HS, L_nmax);
+    __ cbz(len, L_combine);
+
+    __ bind(L_simple_by1_loop);
+    __ ldrb(temp0, Address(__ post(buff, 1)));
+    __ add(s1, s1, temp0);
+    __ add(s2, s2, s1);
+    __ subs(len, len, 1);
+    __ br(Assembler::HI, L_simple_by1_loop);
+
+    // s1 = s1 % BASE
+    __ subs(temp0, s1, base);
+    __ csel(s1, temp0, s1, Assembler::HS);
+
+    // s2 = s2 % BASE
+    __ lsr(temp0, s2, 16);
+    __ lsl(temp1, temp0, 4);
+    __ sub(temp1, temp1, temp0);
+    __ add(s2, temp1, s2, ext::uxth);
+
+    __ subs(temp0, s2, base);
+    __ csel(s2, temp0, s2, Assembler::HS);
+
+    __ b(L_combine);
+
+    __ bind(L_nmax);
+    __ subs(len, len, nmax);
+    __ sub(count, nmax, 16);
+    __ br(Assembler::LO, L_by16);
+
+    __ bind(L_nmax_loop);
+
+    __ ldp(temp0, temp1, Address(__ post(buff, 16)));
+
+    __ add(s1, s1, temp0, ext::uxtb);
+    __ ubfx(temp2, temp0, 8, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 16, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 24, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 32, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 40, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 48, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp0, Assembler::LSR, 56);
+    __ add(s2, s2, s1);
+
+    __ add(s1, s1, temp1, ext::uxtb);
+    __ ubfx(temp2, temp1, 8, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 16, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 24, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 32, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 40, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 48, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp1, Assembler::LSR, 56);
+    __ add(s2, s2, s1);
+
+    __ subs(count, count, 16);
+    __ br(Assembler::HS, L_nmax_loop);
+
+    // s1 = s1 % BASE
+    __ lsr(temp0, s1, 16);
+    __ lsl(temp1, temp0, 4);
+    __ sub(temp1, temp1, temp0);
+    __ add(temp1, temp1, s1, ext::uxth);
+
+    __ lsr(temp0, temp1, 16);
+    __ lsl(s1, temp0, 4);
+    __ sub(s1, s1, temp0);
+    __ add(s1, s1, temp1, ext:: uxth);
+
+    __ subs(temp0, s1, base);
+    __ csel(s1, temp0, s1, Assembler::HS);
+
+    // s2 = s2 % BASE
+    __ lsr(temp0, s2, 16);
+    __ lsl(temp1, temp0, 4);
+    __ sub(temp1, temp1, temp0);
+    __ add(temp1, temp1, s2, ext::uxth);
+
+    __ lsr(temp0, temp1, 16);
+    __ lsl(s2, temp0, 4);
+    __ sub(s2, s2, temp0);
+    __ add(s2, s2, temp1, ext:: uxth);
+
+    __ subs(temp0, s2, base);
+    __ csel(s2, temp0, s2, Assembler::HS);
+
+    __ subs(len, len, nmax);
+    __ sub(count, nmax, 16);
+    __ br(Assembler::HS, L_nmax_loop);
+
+    __ bind(L_by16);
+    __ adds(len, len, count);
+    __ br(Assembler::LO, L_by1);
+
+    __ bind(L_by16_loop);
+
+    __ ldp(temp0, temp1, Address(__ post(buff, 16)));
+
+    __ add(s1, s1, temp0, ext::uxtb);
+    __ ubfx(temp2, temp0, 8, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 16, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 24, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 32, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 40, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 48, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp0, Assembler::LSR, 56);
+    __ add(s2, s2, s1);
+
+    __ add(s1, s1, temp1, ext::uxtb);
+    __ ubfx(temp2, temp1, 8, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 16, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 24, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 32, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 40, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 48, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp1, Assembler::LSR, 56);
+    __ add(s2, s2, s1);
+
+    __ subs(len, len, 16);
+    __ br(Assembler::HS, L_by16_loop);
+
+    __ bind(L_by1);
+    __ adds(len, len, 15);
+    __ br(Assembler::LO, L_do_mod);
+
+    __ bind(L_by1_loop);
+    __ ldrb(temp0, Address(__ post(buff, 1)));
+    __ add(s1, temp0, s1);
+    __ add(s2, s2, s1);
+    __ subs(len, len, 1);
+    __ br(Assembler::HS, L_by1_loop);
+
+    __ bind(L_do_mod);
+    // s1 = s1 % BASE
+    __ lsr(temp0, s1, 16);
+    __ lsl(temp1, temp0, 4);
+    __ sub(temp1, temp1, temp0);
+    __ add(temp1, temp1, s1, ext::uxth);
+
+    __ lsr(temp0, temp1, 16);
+    __ lsl(s1, temp0, 4);
+    __ sub(s1, s1, temp0);
+    __ add(s1, s1, temp1, ext:: uxth);
+
+    __ subs(temp0, s1, base);
+    __ csel(s1, temp0, s1, Assembler::HS);
+
+    // s2 = s2 % BASE
+    __ lsr(temp0, s2, 16);
+    __ lsl(temp1, temp0, 4);
+    __ sub(temp1, temp1, temp0);
+    __ add(temp1, temp1, s2, ext::uxth);
+
+    __ lsr(temp0, temp1, 16);
+    __ lsl(s2, temp0, 4);
+    __ sub(s2, s2, temp0);
+    __ add(s2, s2, temp1, ext:: uxth);
+
+    __ subs(temp0, s2, base);
+    __ csel(s2, temp0, s2, Assembler::HS);
+
+    // Combine lower bits and higher bits
+    __ bind(L_combine);
+    __ orr(s1, s1, s2, Assembler::LSL, 16); // adler = s1 | (s2 << 16)
+
+    __ ret(lr);
+
+    return start;
+  }
+
  /**
   *  Arguments:
   *
@ -3613,6 +3884,11 @@ class StubGenerator: public StubCodeGenerator {
      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
    }

+    // generate Adler32 intrinsics code
+    if (UseAdler32Intrinsics) {
+      StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
+    }
+
    // Safefetch stubs.
    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
                                                       &StubRoutines::_safefetch32_fault_pc,
--- a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
@ -535,7 +535,7 @@ void InterpreterGenerator::generate_stack_overflow_check(void) {
 //      r0
 //      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
 //      rscratch1, rscratch2 (scratch regs)
-void InterpreterGenerator::lock_method(void) {
+void TemplateInterpreterGenerator::lock_method() {
  // synchronize method
  const Address access_flags(rmethod, Method::access_flags_offset());
  const Address monitor_block_top(
@ -721,8 +721,7 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {

    // generate a vanilla interpreter entry as the slow path
    __ bind(slow_path);
-    (void) generate_normal_entry(false);
-
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
    return entry;
  }
 #endif // INCLUDE_ALL_GCS
@ -779,12 +778,10 @@ address InterpreterGenerator::generate_CRC32_update_entry() {

    // generate a vanilla native entry as the slow path
    __ bind(slow_path);
-
-    (void) generate_native_entry(false);
-
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
    return entry;
  }
-  return generate_native_entry(false);
+  return NULL;
 }

 /**
@ -841,12 +838,10 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret

    // generate a vanilla native entry as the slow path
    __ bind(slow_path);
-
-    (void) generate_native_entry(false);
-
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
    return entry;
  }
-  return generate_native_entry(false);
+  return NULL;
 }

 void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
@ -178,9 +178,8 @@ void VM_Version::get_processor_features() {
    warning("UseCRC32 specified, but not supported on this CPU");
  }

-  if (UseAdler32Intrinsics) {
-    warning("Adler32Intrinsics not available on this CPU.");
-    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, true);
  }

  if (auxv & HWCAP_AES) {
--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
@ -60,6 +60,7 @@ define_pd_global(intx, LoopUnrollLimit,              60);
 define_pd_global(bool, OptoPeephole,                 false);
 define_pd_global(bool, UseCISCSpill,                 false);
 define_pd_global(bool, OptoBundling,                 false);
+define_pd_global(bool, OptoRegScheduling,            false);
 // GL:
 // Detected a problem with unscaled compressed oops and
 // narrow_oop_use_complex_address() == false.
--- a/hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp
@ -94,10 +94,12 @@ bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {

 const int IC_pos_in_java_to_interp_stub = 8;
 #define __ _masm.
-address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark/* = NULL*/) {
 #ifdef COMPILER2
-  // Get the mark within main instrs section which is set to the address of the call.
-  address call_addr = cbuf.insts_mark();
+  if (mark == NULL) {
+    // Get the mark within main instrs section which is set to the address of the call.
+    mark = cbuf.insts_mark();
+  }

  // Note that the code buffer's insts_mark is always relative to insts.
  // That's why we must use the macroassembler to generate a stub.
@ -117,7 +119,7 @@ address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
  // Create a static stub relocation which relates this stub
  // with the call instruction at insts_call_instruction_offset in the
  // instructions code-section.
-  __ relocate(static_stub_Relocation::spec(call_addr));
+  __ relocate(static_stub_Relocation::spec(mark));
  const int stub_start_offset = __ offset();

  // Now, create the stub's code:
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
@ -46,7 +46,7 @@ void InterpreterMacroAssembler::null_check_throw(Register a, int offset, Registe
  MacroAssembler::null_check_throw(a, offset, temp_reg, exception_entry);
 }

-void InterpreterMacroAssembler::branch_to_entry(address entry, Register Rscratch) {
+void InterpreterMacroAssembler::jump_to_entry(address entry, Register Rscratch) {
  assert(entry, "Entry must have been generated by now");
  if (is_within_range_of_b(entry, pc())) {
    b(entry);
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
@ -39,7 +39,7 @@ class InterpreterMacroAssembler: public MacroAssembler {

  void null_check_throw(Register a, int offset, Register temp_reg);

-  void branch_to_entry(address entry, Register Rscratch);
+  void jump_to_entry(address entry, Register Rscratch);

  // Handy address generation macros.
 #define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread
--- a/hotspot/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp
@ -31,12 +31,12 @@
 private:

  address generate_abstract_entry(void);
-  address generate_jump_to_normal_entry(void);
-  address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); }
-  address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
+  address generate_accessor_entry(void) { return NULL; }
+  address generate_empty_entry(void) { return NULL; }
  address generate_Reference_get_entry(void);

  address generate_CRC32_update_entry();
  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+  address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }

 #endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp
@ -427,18 +427,6 @@ address AbstractInterpreterGenerator::generate_result_handler_for(BasicType type
  return entry;
 }

-// Call an accessor method (assuming it is resolved, otherwise drop into
-// vanilla (slow path) entry.
-address InterpreterGenerator::generate_jump_to_normal_entry(void) {
-  address entry = __ pc();
-  address normal_entry = Interpreter::entry_for_kind(Interpreter::zerolocals);
-  assert(normal_entry != NULL, "should already be generated.");
-  __ branch_to_entry(normal_entry, R11_scratch1);
-  __ flush();
-
-  return entry;
-}
-
 // Abstract method entry.
 //
 address InterpreterGenerator::generate_abstract_entry(void) {
@ -529,13 +517,13 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
  //   regular method entry code to generate the NPE.
  //

-  address entry = __ pc();
-
-  const int referent_offset = java_lang_ref_Reference::referent_offset;
-  guarantee(referent_offset > 0, "referent offset not initialized");
-
  if (UseG1GC) {
-     Label slow_path;
+    address entry = __ pc();
+
+    const int referent_offset = java_lang_ref_Reference::referent_offset;
+    guarantee(referent_offset > 0, "referent offset not initialized");
+
+    Label slow_path;

    // Debugging not possible, so can't use __ skip_if_jvmti_mode(slow_path, GR31_SCRATCH);

@ -577,13 +565,11 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {

    // Generate regular method entry.
    __ bind(slow_path);
-    __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1);
-    __ flush();
-
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1);
    return entry;
-  } else {
-    return generate_jump_to_normal_entry();
  }
+
+  return NULL;
 }

 void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
--- a/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp
@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_ppc.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+  Unimplemented();
+  return 0;
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+  Unimplemented();
+}
+
+// convert JVMCI register indices (as used in oop maps) to HotSpot registers
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+  return NULL;
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+  return false;
+}
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
@ -594,13 +594,6 @@ void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool
           "can't identify emitted call");
  } else {
    // variant 1:
-#if defined(ABI_ELFv2)
-    nop();
-    calculate_address_from_global_toc(R12, dest, true, true, false);
-    mtctr(R12);
-    nop();
-    nop();
-#else
    mr(R0, R11);  // spill R11 -> R0.

    // Load the destination address into CTR,
@ -610,7 +603,6 @@ void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool
    mtctr(R11);
    mr(R11, R0);  // spill R11 <- R0.
    nop();
-#endif

    // do the call/jump
    if (link) {
@ -4292,7 +4284,7 @@ const char* stop_types[] = {

 static void stop_on_request(int tp, const char* msg) {
  tty->print("PPC assembly code requires stop: (%s) %s\n", stop_types[tp%/*stop_end*/4], msg);
-  guarantee(false, err_msg("PPC assembly code requires stop: %s", msg));
+  guarantee(false, "PPC assembly code requires stop: %s", msg);
 }

 // Call a C-function that prints output.
--- a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp
@ -60,7 +60,7 @@ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_

 #ifdef ASSERT
 static int check_nonzero(const char* xname, int x) {
-  assert(x != 0, err_msg("%s should be nonzero", xname));
+  assert(x != 0, "%s should be nonzero", xname);
  return x;
 }
 #define NONZERO(x) check_nonzero(#x, x)
@ -434,7 +434,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
    }

    default:
-      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
      break;
    }

--- a/hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp
@ -149,7 +149,7 @@ void NativeCall::verify() {
  if (!NativeCall::is_call_at(addr)) {
    tty->print_cr("not a NativeCall at " PTR_FORMAT, p2i(addr));
    // TODO: PPC port: Disassembler::decode(addr - 20, addr + 20, tty);
-    fatal(err_msg("not a NativeCall at " PTR_FORMAT, p2i(addr)));
+    fatal("not a NativeCall at " PTR_FORMAT, p2i(addr));
  }
 }
 #endif // ASSERT
@ -162,7 +162,7 @@ void NativeFarCall::verify() {
  if (!NativeFarCall::is_far_call_at(addr)) {
    tty->print_cr("not a NativeFarCall at " PTR_FORMAT, p2i(addr));
    // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
-    fatal(err_msg("not a NativeFarCall at " PTR_FORMAT, p2i(addr)));
+    fatal("not a NativeFarCall at " PTR_FORMAT, p2i(addr));
  }
 }
 #endif // ASSERT
@ -308,7 +308,7 @@ void NativeMovConstReg::verify() {
        ! MacroAssembler::is_bl(*((int*) addr))) {
      tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, p2i(addr));
      // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
-      fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, p2i(addr)));
+      fatal("not a NativeMovConstReg at " PTR_FORMAT, p2i(addr));
    }
  }
 }
@ -346,7 +346,7 @@ void NativeJump::verify() {
  if (!NativeJump::is_jump_at(addr)) {
    tty->print_cr("not a NativeJump at " PTR_FORMAT, p2i(addr));
    // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
-    fatal(err_msg("not a NativeJump at " PTR_FORMAT, p2i(addr)));
+    fatal("not a NativeJump at " PTR_FORMAT, p2i(addr));
  }
 }
 #endif // ASSERT
--- a/hotspot/src/cpu/ppc/vm/ppc.ad
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad
@ -2064,6 +2064,10 @@ const bool Matcher::match_rule_supported(int opcode) {
  return true;  // Per default match rules are supported.
 }

+const int Matcher::float_pressure(int default_pressure_threshold) {
+  return default_pressure_threshold;
+}
+
 int Matcher::regnum_to_fpu_offset(int regnum) {
  // No user for this method?
  Unimplemented();
--- a/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp
@ -125,8 +125,5 @@ address Relocation::pd_get_address_from_code() {
 void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
 }

-void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
-}
-
 void metadata_Relocation::pd_fix_value(address x) {
 }
--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
@ -475,9 +475,8 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_siz

 // Is vector's size (in bytes) bigger than a size saved by default?
 bool SharedRuntime::is_wide_vector(int size) {
-  ResourceMark rm;
  // Note, MaxVectorSize == 8 on PPC64.
-  assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size));
+  assert(size <= 8, "%d bytes vectors are not supported", size);
  return size > 8;
 }
 #ifdef COMPILER2
@ -957,11 +956,11 @@ static address gen_c2i_adapter(MacroAssembler *masm,
  return c2i_entrypoint;
 }

-static void gen_i2c_adapter(MacroAssembler *masm,
-                            int total_args_passed,
-                            int comp_args_on_stack,
-                            const BasicType *sig_bt,
-                            const VMRegPair *regs) {
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+                                    int total_args_passed,
+                                    int comp_args_on_stack,
+                                    const BasicType *sig_bt,
+                                    const VMRegPair *regs) {

  // Load method's entry-point from method.
  __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
@ -1631,7 +1630,7 @@ static void gen_special_dispatch(MacroAssembler* masm,
  } else if (iid == vmIntrinsics::_invokeBasic) {
    has_receiver = true;
  } else {
-    fatal(err_msg_res("unexpected intrinsic id %d", iid));
+    fatal("unexpected intrinsic id %d", iid);
  }

  if (member_reg != noreg) {
--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
@ -841,7 +841,7 @@ class StubGenerator: public StubCodeGenerator {
  // Only called by MacroAssembler::verify_oop
  static void verify_oop_helper(const char* message, oop o) {
    if (!o->is_oop_or_null()) {
-      fatal(message);
+      fatal("%s", message);
    }
    ++ StubRoutines::_verify_oop_count;
  }
--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
@ -620,7 +620,7 @@ inline bool math_entry_available(AbstractInterpreter::MethodKind kind) {
 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
  if (!math_entry_available(kind)) {
    NOT_PRODUCT(__ should_not_reach_here();)
-    return Interpreter::entry_for_kind(Interpreter::zerolocals);
+    return NULL;
  }

  address entry = __ pc();
@ -1126,14 +1126,6 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {

  generate_fixed_frame(false, Rsize_of_parameters, Rsize_of_locals);

-#ifdef FAST_DISPATCH
-  __ unimplemented("Fast dispatch in generate_normal_entry");
-#if 0
-  __ set((intptr_t)Interpreter::dispatch_table(), IdispatchTables);
-  // Set bytecode dispatch table base.
-#endif
-#endif
-
  // --------------------------------------------------------------------------
  // Zero out non-parameter locals.
  // Note: *Always* zero out non-parameter locals as Sparc does. It's not
@ -1266,9 +1258,8 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
 *   int java.util.zip.CRC32.update(int crc, int b)
 */
 address InterpreterGenerator::generate_CRC32_update_entry() {
-  address start = __ pc();  // Remember stub start address (is rtn value).
-
  if (UseCRC32Intrinsics) {
+    address start = __ pc();  // Remember stub start address (is rtn value).
    Label slow_path;

    // Safepoint check
@ -1313,11 +1304,11 @@ address InterpreterGenerator::generate_CRC32_update_entry() {
    // Generate a vanilla native entry as the slow path.
    BLOCK_COMMENT("} CRC32_update");
    BIND(slow_path);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1);
+    return start;
  }

-  (void) generate_native_entry(false);
-
-  return start;
+  return NULL;
 }

 // CRC32 Intrinsics.
@ -1327,9 +1318,8 @@ address InterpreterGenerator::generate_CRC32_update_entry() {
 *   int java.util.zip.CRC32.updateByteBuffer(int crc, long* buf, int off, int len)
 */
 address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
-  address start = __ pc();  // Remember stub start address (is rtn value).
-
  if (UseCRC32Intrinsics) {
+    address start = __ pc();  // Remember stub start address (is rtn value).
    Label slow_path;

    // Safepoint check
@ -1406,11 +1396,11 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
    // Generate a vanilla native entry as the slow path.
    BLOCK_COMMENT("} CRC32_updateBytes(Buffer)");
    BIND(slow_path);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1);
+    return start;
  }

-  (void) generate_native_entry(false);
-
-  return start;
+  return NULL;
 }

 // These should never be compiled since the interpreter will prefer
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
@ -389,7 +389,7 @@ class Assembler : public AbstractAssembler  {

  static void assert_signed_range(intptr_t x, int nbits) {
    assert(nbits == 32 || (-(1 << nbits-1) <= x  &&  x < ( 1 << nbits-1)),
-           err_msg("value out of range: x=" INTPTR_FORMAT ", nbits=%d", x, nbits));
+           "value out of range: x=" INTPTR_FORMAT ", nbits=%d", x, nbits);
  }

  static void assert_signed_word_disp_range(intptr_t x, int nbits) {
--- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp
@ -64,6 +64,7 @@ define_pd_global(bool, OptoPeephole,                 false);
 define_pd_global(bool, UseCISCSpill,                 false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoScheduling,               true);
+define_pd_global(bool, OptoRegScheduling,            false);

 #ifdef _LP64
 // We need to make sure that all generated code is within
--- a/hotspot/src/cpu/sparc/vm/compiledIC_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/compiledIC_sparc.cpp
@ -53,14 +53,15 @@ bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
 // ----------------------------------------------------------------------------

 #define __ _masm.
-address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
-#ifdef COMPILER2
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
  // Stub is fixed up when the corresponding call is converted from calling
  // compiled code to calling interpreted code.
  // set (empty), G5
  // jmp -1

-  address mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  if (mark == NULL) {
+    mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  }

  MacroAssembler _masm(&cbuf);

@ -80,12 +81,11 @@ address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {

  __ delayed()->nop();

+  assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size");
+
  // Update current stubs pointer and restore code_end.
  __ end_a_stub();
  return base;
-#else
-  ShouldNotReachHere();
-#endif
 }
 #undef __

--- a/hotspot/src/cpu/sparc/vm/cppInterpreterGenerator_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/cppInterpreterGenerator_sparc.hpp
@ -31,6 +31,7 @@

  void generate_more_monitors();
  void generate_deopt_handling();
+  void lock_method(void);
  void adjust_callers_stack(Register args);
  void generate_compute_interpreter_state(const Register state,
                                          const Register prev_state,
--- a/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp
@ -468,7 +468,7 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {

  // If G1 is not enabled then attempt to go through the accessor entry point
  // Reference.get is an accessor
-  return generate_jump_to_normal_entry();
+  return NULL;
 }

 //
@ -1164,7 +1164,7 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register
 }
 // Find preallocated  monitor and lock method (C++ interpreter)
 //
-void InterpreterGenerator::lock_method(void) {
+void CppInterpreterGenerator::lock_method() {
 // Lock the current method.
 // Destroys registers L2_scratch, L3_scratch, O0
 //
--- a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp
@ -82,6 +82,7 @@ define_pd_global(uintx, TypeProfileLevel, 111);
                                                                            \
  product(intx, UseVIS, 99,                                                 \
          "Highest supported VIS instructions set on Sparc")                \
+          range(0, 99)                                                      \
                                                                            \
  product(bool, UseCBCond, false,                                           \
          "Use compare and branch instruction on SPARC")                    \
@ -91,12 +92,14 @@ define_pd_global(uintx, TypeProfileLevel, 111);
                                                                            \
  product(intx, BlockZeroingLowLimit, 2048,                                 \
          "Minimum size in bytes when block zeroing will be used")          \
+          range(1, max_jint)                                                \
                                                                            \
  product(bool, UseBlockCopy, false,                                        \
          "Use special cpu instructions for block copy")                    \
                                                                            \
  product(intx, BlockCopyLowLimit, 2048,                                    \
          "Minimum size in bytes when block copy will be used")             \
+          range(1, max_jint)                                                \
                                                                            \
  develop(bool, UseV8InstrsOnly, false,                                     \
          "Use SPARC-V8 Compliant instruction subset")                      \
@ -108,9 +111,11 @@ define_pd_global(uintx, TypeProfileLevel, 111);
          "Do not use swap instructions, but only CAS (in a loop) on SPARC")\
                                                                            \
  product(uintx,  ArraycopySrcPrefetchDistance, 0,                          \
-          "Distance to prefetch source array in arracopy")                  \
+          "Distance to prefetch source array in arraycopy")                 \
+          constraint(ArraycopySrcPrefetchDistanceConstraintFunc, AfterErgo) \
                                                                            \
  product(uintx,  ArraycopyDstPrefetchDistance, 0,                          \
-          "Distance to prefetch destination array in arracopy")             \
+          "Distance to prefetch destination array in arraycopy")            \
+          constraint(ArraycopyDstPrefetchDistanceConstraintFunc, AfterErgo) \

 #endif // CPU_SPARC_VM_GLOBALS_SPARC_HPP
--- a/hotspot/src/cpu/sparc/vm/interp_masm_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/interp_masm_sparc.cpp
@ -59,6 +59,13 @@ const Address InterpreterMacroAssembler::d_tmp(FP, (frame::interpreter_frame_d_s

 #endif // CC_INTERP

+void InterpreterMacroAssembler::jump_to_entry(address entry) {
+  assert(entry, "Entry must have been generated by now");
+  AddressLiteral al(entry);
+  jump_to(al, G3_scratch);
+  delayed()->nop();
+}
+
 void InterpreterMacroAssembler::compute_extra_locals_size_in_bytes(Register args_size, Register locals_size, Register delta) {
  // Note: this algorithm is also used by C1's OSR entry sequence.
  // Any changes should also be applied to CodeEmitter::emit_osr_entry().
@ -1643,26 +1650,73 @@ void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
    bind(skip_receiver_profile);

    // The method data pointer needs to be updated to reflect the new target.
+#if INCLUDE_JVMCI
+    if (MethodProfileWidth == 0) {
+      update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
+    }
+#else
    update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
-    bind (profile_continue);
+#endif
+    bind(profile_continue);
  }
 }

-void InterpreterMacroAssembler::record_klass_in_profile_helper(
-                                        Register receiver, Register scratch,
-                                        int start_row, Label& done, bool is_virtual_call) {
+#if INCLUDE_JVMCI
+void InterpreterMacroAssembler::profile_called_method(Register method, Register scratch) {
+  assert_different_registers(method, scratch);
+  if (ProfileInterpreter && MethodProfileWidth > 0) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(profile_continue);
+
+    Label done;
+    record_item_in_profile_helper(method, scratch, 0, done, MethodProfileWidth,
+      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
+    bind(done);
+
+    update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+#endif // INCLUDE_JVMCI
+
+void InterpreterMacroAssembler::record_klass_in_profile_helper(Register receiver, Register scratch,
+                                                               Label& done, bool is_virtual_call) {
  if (TypeProfileWidth == 0) {
    if (is_virtual_call) {
      increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch);
    }
-    return;
-  }
+#if INCLUDE_JVMCI
+    else if (EnableJVMCI) {
+      increment_mdp_data_at(in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()), scratch);
+    }
+#endif
+  } else {
+    int non_profiled_offset = -1;
+    if (is_virtual_call) {
+      non_profiled_offset = in_bytes(CounterData::count_offset());
+    }
+#if INCLUDE_JVMCI
+    else if (EnableJVMCI) {
+      non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
+    }
+#endif

-  int last_row = VirtualCallData::row_limit() - 1;
+    record_item_in_profile_helper(receiver, scratch, 0, done, TypeProfileWidth,
+      &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
+  }
+}
+
+void InterpreterMacroAssembler::record_item_in_profile_helper(Register item,
+                                          Register scratch, int start_row, Label& done, int total_rows,
+                                          OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                          int non_profiled_offset) {
+  int last_row = total_rows - 1;
  assert(start_row <= last_row, "must be work left to do");
-  // Test this row for both the receiver and for null.
+  // Test this row for both the item and for null.
  // Take any of three different outcomes:
-  //   1. found receiver => increment count and goto done
+  //   1. found item => increment count and goto done
  //   2. found null => keep looking for case 1, maybe allocate this cell
  //   3. found something else => keep looking for cases 1 and 2
  // Case 3 is handled by a recursive call.
@ -1670,28 +1724,28 @@ void InterpreterMacroAssembler::record_klass_in_profile_helper(
    Label next_test;
    bool test_for_null_also = (row == start_row);

-    // See if the receiver is receiver[n].
-    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
-    test_mdp_data_at(recvr_offset, receiver, next_test, scratch);
+    // See if the item is item[n].
+    int item_offset = in_bytes(item_offset_fn(row));
+    test_mdp_data_at(item_offset, item, next_test, scratch);
    // delayed()->tst(scratch);

-    // The receiver is receiver[n].  Increment count[n].
-    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    // The receiver is item[n].  Increment count[n].
+    int count_offset = in_bytes(item_count_offset_fn(row));
    increment_mdp_data_at(count_offset, scratch);
    ba_short(done);
    bind(next_test);

    if (test_for_null_also) {
      Label found_null;
-      // Failed the equality check on receiver[n]...  Test for null.
+      // Failed the equality check on item[n]...  Test for null.
      if (start_row == last_row) {
        // The only thing left to do is handle the null case.
-        if (is_virtual_call) {
+        if (non_profiled_offset >= 0) {
          brx(Assembler::zero, false, Assembler::pn, found_null);
          delayed()->nop();
-          // Receiver did not match any saved receiver and there is no empty row for it.
+          // Item did not match any saved item and there is no empty row for it.
          // Increment total counter to indicate polymorphic case.
-          increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch);
+          increment_mdp_data_at(non_profiled_offset, scratch);
          ba_short(done);
          bind(found_null);
        } else {
@ -1705,21 +1759,22 @@ void InterpreterMacroAssembler::record_klass_in_profile_helper(
      delayed()->nop();

      // Put all the "Case 3" tests here.
-      record_klass_in_profile_helper(receiver, scratch, start_row + 1, done, is_virtual_call);
+      record_item_in_profile_helper(item, scratch, start_row + 1, done, total_rows,
+        item_offset_fn, item_count_offset_fn, non_profiled_offset);

-      // Found a null.  Keep searching for a matching receiver,
+      // Found a null.  Keep searching for a matching item,
      // but remember that this is an empty (unused) slot.
      bind(found_null);
    }
  }

-  // In the fall-through case, we found no matching receiver, but we
-  // observed the receiver[start_row] is NULL.
+  // In the fall-through case, we found no matching item, but we
+  // observed the item[start_row] is NULL.

-  // Fill in the receiver field and increment the count.
-  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
-  set_mdp_data_at(recvr_offset, receiver);
-  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  // Fill in the item field and increment the count.
+  int item_offset = in_bytes(item_offset_fn(start_row));
+  set_mdp_data_at(item_offset, item);
+  int count_offset = in_bytes(item_count_offset_fn(start_row));
  mov(DataLayout::counter_increment, scratch);
  set_mdp_data_at(count_offset, scratch);
  if (start_row > 0) {
@ -1732,7 +1787,7 @@ void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
  assert(ProfileInterpreter, "must be profiling");
  Label done;

-  record_klass_in_profile_helper(receiver, scratch, 0, done, is_virtual_call);
+  record_klass_in_profile_helper(receiver, scratch, done, is_virtual_call);

  bind (done);
 }
@ -1788,7 +1843,7 @@ void InterpreterMacroAssembler::profile_null_seen(Register scratch) {
    // The method data pointer needs to be updated.
    int mdp_delta = in_bytes(BitData::bit_data_size());
    if (TypeProfileCasts) {
-      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+      mdp_delta = in_bytes(ReceiverTypeData::receiver_type_data_size());
    }
    update_mdp_by_constant(mdp_delta);

@ -1806,7 +1861,7 @@ void InterpreterMacroAssembler::profile_typecheck(Register klass,

    int mdp_delta = in_bytes(BitData::bit_data_size());
    if (TypeProfileCasts) {
-      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+      mdp_delta = in_bytes(ReceiverTypeData::receiver_type_data_size());

      // Record the object type.
      record_klass_in_profile(klass, scratch, false);
@ -1828,7 +1883,7 @@ void InterpreterMacroAssembler::profile_typecheck_failed(Register scratch) {

    int count_offset = in_bytes(CounterData::count_offset());
    // Back up the address, since we have already bumped the mdp.
-    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+    count_offset -= in_bytes(ReceiverTypeData::receiver_type_data_size());

    // *Decrement* the counter.  We expect to see zero or small negatives.
    increment_mdp_data_at(count_offset, scratch, true);
--- a/hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp
@ -30,6 +30,8 @@

 // This file specializes the assember with interpreter-specific macros

+typedef ByteSize (*OffsetFunction)(uint);
+
 REGISTER_DECLARATION(     Register, Otos_i , O0); // tos for ints, etc
 REGISTER_DECLARATION(     Register, Otos_l , O0); // for longs
 REGISTER_DECLARATION(     Register, Otos_l1, O0); // for 1st part of longs
@ -80,6 +82,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
  InterpreterMacroAssembler(CodeBuffer* c)
    : MacroAssembler(c) {}

+  void jump_to_entry(address entry);
+
 #ifndef CC_INTERP
  virtual void load_earlyret_value(TosState state);

@ -299,7 +303,11 @@ class InterpreterMacroAssembler: public MacroAssembler {

  void record_klass_in_profile(Register receiver, Register scratch, bool is_virtual_call);
  void record_klass_in_profile_helper(Register receiver, Register scratch,
-                                      int start_row, Label& done, bool is_virtual_call);
+                                      Label& done, bool is_virtual_call);
+  void record_item_in_profile_helper(Register item,
+                                     Register scratch, int start_row, Label& done, int total_rows,
+                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                     int non_profiled_offset);

  void update_mdp_by_offset(int offset_of_disp, Register scratch);
  void update_mdp_by_offset(Register reg, int offset_of_disp,
@ -312,6 +320,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void profile_call(Register scratch);
  void profile_final_call(Register scratch);
  void profile_virtual_call(Register receiver, Register scratch, bool receiver_can_be_null = false);
+  void profile_called_method(Register method, Register scratch) NOT_JVMCI_RETURN;
  void profile_ret(TosState state, Register return_bci, Register scratch);
  void profile_null_seen(Register scratch);
  void profile_typecheck(Register klass, Register scratch);
--- a/hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp
@ -34,11 +34,9 @@
  address generate_abstract_entry(void);
  // there are no math intrinsics on sparc
  address generate_math_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
-  address generate_jump_to_normal_entry(void);
-  address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); }
-  address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
+  address generate_accessor_entry(void) { return NULL; }
+  address generate_empty_entry(void) { return NULL; }
  address generate_Reference_get_entry(void);
-  void lock_method(void);
  void save_native_result(void);
  void restore_native_result(void);

@ -48,4 +46,5 @@
  // Not supported
  address generate_CRC32_update_entry() { return NULL; }
  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
+  address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
 #endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP
--- a/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp
@ -241,15 +241,6 @@ void InterpreterGenerator::generate_counter_overflow(Label& Lcontinue) {

 // Various method entries

-address InterpreterGenerator::generate_jump_to_normal_entry(void) {
-  address entry = __ pc();
-  assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated");
-  AddressLiteral al(Interpreter::entry_for_kind(Interpreter::zerolocals));
-  __ jump_to(al, G3_scratch);
-  __ delayed()->nop();
-  return entry;
-}
-
 // Abstract method entry
 // Attempt to execute abstract method. Throw exception
 //
--- a/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp
@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_sparc.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+  if (inst->is_call() || inst->is_jump()) {
+    return pc_offset + NativeCall::instruction_size;
+  } else if (inst->is_call_reg()) {
+    return pc_offset + NativeCallReg::instruction_size;
+  } else if (inst->is_sethi()) {
+    return pc_offset + NativeFarCall::instruction_size;
+  } else {
+    fatal("unsupported type of instruction for call site");
+    return 0;
+  }
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+  address pc = _instructions->start() + pc_offset;
+  Handle obj = HotSpotObjectConstantImpl::object(constant);
+  jobject value = JNIHandles::make_local(obj());
+  if (HotSpotObjectConstantImpl::compressed(constant)) {
+#ifdef _LP64
+    int oop_index = _oop_recorder->find_index(value);
+    RelocationHolder rspec = oop_Relocation::spec(oop_index);
+    _instructions->relocate(pc, rspec, 1);
+#else
+    fatal("compressed oop on 32bit");
+#endif
+  } else {
+    NativeMovConstReg* move = nativeMovConstReg_at(pc);
+    move->set_data((intptr_t) value);
+
+    // We need two relocations:  one on the sethi and one on the add.
+    int oop_index = _oop_recorder->find_index(value);
+    RelocationHolder rspec = oop_Relocation::spec(oop_index);
+    _instructions->relocate(pc + NativeMovConstReg::sethi_offset, rspec);
+    _instructions->relocate(pc + NativeMovConstReg::add_offset, rspec);
+  }
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+  address pc = _instructions->start() + pc_offset;
+  NativeInstruction* inst = nativeInstruction_at(pc);
+  NativeInstruction* inst1 = nativeInstruction_at(pc + 4);
+  if(inst->is_sethi() && inst1->is_nop()) {
+      address const_start = _constants->start();
+      address dest = _constants->start() + data_offset;
+      if(_constants_size > 0) {
+        _instructions->relocate(pc + NativeMovConstReg::sethi_offset, internal_word_Relocation::spec((address) dest));
+        _instructions->relocate(pc + NativeMovConstReg::add_offset, internal_word_Relocation::spec((address) dest));
+      }
+      TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
+  }else {
+    int const_size = align_size_up(_constants->end()-_constants->start(), CodeEntryAlignment);
+    NativeMovRegMem* load = nativeMovRegMem_at(pc);
+    // This offset must match with SPARCLoadConstantTableBaseOp.emitCode
+    load->set_offset(- (const_size - data_offset + Assembler::min_simm13()));
+    TRACE_jvmci_3("relocating ld at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
+  }
+}
+
+void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+  fatal("CodeInstaller::pd_relocate_CodeBlob - sparc unimp");
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+  address pc = (address) inst;
+  if (inst->is_call()) {
+    NativeCall* call = nativeCall_at(pc);
+    call->set_destination((address) foreign_call_destination);
+    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
+  } else if (inst->is_sethi()) {
+    NativeJump* jump = nativeJump_at(pc);
+    jump->set_jump_destination((address) foreign_call_destination);
+    _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
+  } else {
+    fatal(err_msg("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)));
+  }
+  TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+#ifdef ASSERT
+  Method* method = NULL;
+  // we need to check, this might also be an unresolved method
+  if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
+    method = getMethodFromHotSpotMethod(hotspot_method);
+  }
+#endif
+  switch (_next_call_type) {
+    case INLINE_INVOKE:
+      break;
+    case INVOKEVIRTUAL:
+    case INVOKEINTERFACE: {
+      assert(method == NULL || !method->is_static(), "cannot call static method with invokeinterface");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_virtual_call_stub());
+      _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc));
+      break;
+    }
+    case INVOKESTATIC: {
+      assert(method == NULL || method->is_static(), "cannot call non-static method with invokestatic");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_static_call_stub());
+      _instructions->relocate(call->instruction_address(), relocInfo::static_call_type);
+      break;
+    }
+    case INVOKESPECIAL: {
+      assert(method == NULL || !method->is_static(), "cannot call static method with invokespecial");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_opt_virtual_call_stub());
+      _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type);
+      break;
+    }
+    default:
+      fatal("invalid _next_call_type value");
+      break;
+  }
+}
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+  switch (mark) {
+    case POLL_NEAR:
+      fatal("unimplemented");
+      break;
+    case POLL_FAR:
+      _instructions->relocate(pc, relocInfo::poll_type);
+      break;
+    case POLL_RETURN_NEAR:
+      fatal("unimplemented");
+      break;
+    case POLL_RETURN_FAR:
+      _instructions->relocate(pc, relocInfo::poll_return_type);
+      break;
+    default:
+      fatal("invalid mark value");
+      break;
+  }
+}
+
+// convert JVMCI register indices (as used in oop maps) to HotSpot registers
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+  if (jvmci_reg < RegisterImpl::number_of_registers) {
+    return as_Register(jvmci_reg)->as_VMReg();
+  } else {
+    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
+    floatRegisterNumber += MAX2(0, floatRegisterNumber-32); // Beginning with f32, only every second register is going to be addressed
+    if (floatRegisterNumber < FloatRegisterImpl::number_of_registers) {
+      return as_FloatRegister(floatRegisterNumber)->as_VMReg();
+    }
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+  return !hotspotRegister->is_FloatRegister();
+}
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
@ -1596,7 +1596,7 @@ void MacroAssembler::debug(char* msg, RegistersForDebugging* regs) {
  else {
     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
  }
-  assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+  assert(false, "DEBUG MESSAGE: %s", msg);
 }


--- a/hotspot/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp
@ -61,8 +61,8 @@ inline void fill_subword(void* start, void* end, int value) {
    " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
    " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
    " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
-    " rd %pc, %[pc]\n\t"                // dispatch on scaled offset
-    " jmpl %[pc]+%[offset], %g0\n\t"
+    " rd %%pc, %[pc]\n\t"               // dispatch on scaled offset
+    " jmpl %[pc]+%[offset], %%g0\n\t"
    "  nop\n\t"
    // DISPATCH: no direct reference, but without it the store block may be elided.
    "1:\n\t"
@ -108,7 +108,7 @@ void memset_with_concurrent_readers(void* to, int value, size_t size) {
      // Unroll loop x8.
      " sub %[aend], %[ato], %[temp]\n\t"
      " cmp %[temp], 56\n\t"           // cc := (aligned_end - aligned_to) > 7 words
-      " ba %xcc, 2f\n\t"               // goto TEST always
+      " ba %%xcc, 2f\n\t"              // goto TEST always
      "  sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
      // LOOP:
      "1:\n\t"                         // unrolled x8 store loop top
@ -123,7 +123,7 @@ void memset_with_concurrent_readers(void* to, int value, size_t size) {
      " stx %[xvalue], [%[ato]-8]\n\t"
      // TEST:
      "2:\n\t"
-      " bgu,a %xcc, 1b\n\t"            // goto LOOP if more than 7 words remaining
+      " bgu,a %%xcc, 1b\n\t"           // goto LOOP if more than 7 words remaining
      "  add %[ato], 64, %[ato]\n\t"   // aligned_to += 8, for next iteration
      // Fill remaining < 8 full words.
      // Dispatch on (aligned_end - aligned_to).
@ -132,8 +132,8 @@ void memset_with_concurrent_readers(void* to, int value, size_t size) {
      " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
      " srax %[ato], 1, %[ato]\n\t"      // scale offset for instruction size of 4
      " add %[ato], 40, %[ato]\n\t"      // offset += 10 * instruction size
-      " rd %pc, %[temp]\n\t"             // dispatch on scaled offset
-      " jmpl %[temp]+%[ato], %g0\n\t"
+      " rd %%pc, %[temp]\n\t"            // dispatch on scaled offset
+      " jmpl %[temp]+%[ato], %%g0\n\t"
      "  nop\n\t"
      // DISPATCH: no direct reference, but without it the store block may be elided.
      "3:\n\t"
--- a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp
@ -56,7 +56,7 @@ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_

 #ifdef ASSERT
 static int check_nonzero(const char* xname, int x) {
-  assert(x != 0, err_msg("%s should be nonzero", xname));
+  assert(x != 0, "%s should be nonzero", xname);
  return x;
 }
 #define NONZERO(x) check_nonzero(#x, x)
@ -453,7 +453,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
    }

    default:
-      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
      break;
    }

--- a/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp
@ -53,6 +53,7 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {

  bool is_nop()                        { return long_at(0) == nop_instruction(); }
  bool is_call()                       { return is_op(long_at(0), Assembler::call_op); }
+  bool is_call_reg()                   { return is_op(long_at(0), Assembler::arith_op); }
  bool is_sethi()                      { return (is_op2(long_at(0), Assembler::sethi_op2)
                                          && inv_rd(long_at(0)) != G0); }

@ -415,6 +416,19 @@ inline NativeCall* nativeCall_at(address instr) {
  return call;
 }

+class NativeCallReg: public NativeInstruction {
+ public:
+  enum Sparc_specific_constants {
+    instruction_size      = 8,
+    return_address_offset = 8,
+    instruction_offset    = 0
+  };
+
+  address next_instruction_address() const {
+    return addr_at(instruction_size);
+  }
+};
+
 // The NativeFarCall is an abstraction for accessing/manipulating native call-anywhere
 // instructions in the sparcv9 vm.  Used to call native methods which may be loaded
 // anywhere in the address space, possibly out of reach of a call instruction.
--- a/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp
@ -197,8 +197,5 @@ address Relocation::pd_get_address_from_code() {
 void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
 }

-void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
-}
-
 void metadata_Relocation::pd_fix_value(address x) {
 }
--- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp
@ -43,6 +43,9 @@
 #include "compiler/compileBroker.hpp"
 #include "shark/sharkCompiler.hpp"
 #endif
+#if INCLUDE_JVMCI
+#include "jvmci/jvmciJavaClasses.hpp"
+#endif

 #define __ masm->

@ -316,7 +319,7 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 // 8 bytes FP registers are saved by default on SPARC.
 bool SharedRuntime::is_wide_vector(int size) {
  // Note, MaxVectorSize == 8 on SPARC.
-  assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size));
+  assert(size <= 8, "%d bytes vectors are not supported", size);
  return size > 8;
 }

@ -464,7 +467,7 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
      break;

    default:
-      fatal(err_msg_res("unknown basic type %d", sig_bt[i]));
+      fatal("unknown basic type %d", sig_bt[i]);
      break;
    }
  }
@ -513,10 +516,10 @@ class AdapterGenerator {
                              const VMRegPair *regs,
                              Label& skip_fixup);
  void gen_i2c_adapter(int total_args_passed,
-                              // VMReg max_arg,
-                              int comp_args_on_stack, // VMRegStackSlots
-                              const BasicType *sig_bt,
-                              const VMRegPair *regs);
+                       // VMReg max_arg,
+                       int comp_args_on_stack, // VMRegStackSlots
+                       const BasicType *sig_bt,
+                       const VMRegPair *regs);

  AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
 };
@ -760,13 +763,11 @@ static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg
  __ bind(L_fail);
 }

-void AdapterGenerator::gen_i2c_adapter(
-                            int total_args_passed,
-                            // VMReg max_arg,
-                            int comp_args_on_stack, // VMRegStackSlots
-                            const BasicType *sig_bt,
-                            const VMRegPair *regs) {
-
+void AdapterGenerator::gen_i2c_adapter(int total_args_passed,
+                                       // VMReg max_arg,
+                                       int comp_args_on_stack, // VMRegStackSlots
+                                       const BasicType *sig_bt,
+                                       const VMRegPair *regs) {
  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
  // layout.  Lesp was saved by the calling I-frame and will be restored on
  // return.  Meanwhile, outgoing arg space is all owned by the callee
@ -990,6 +991,21 @@ void AdapterGenerator::gen_i2c_adapter(

  // Jump to the compiled code just as if compiled code was doing it.
  __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3);
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    // check if this call should be routed towards a specific entry point
+    __ ld(Address(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), G1);
+    __ cmp(G0, G1);
+    Label no_alternative_target;
+    __ br(Assembler::equal, false, Assembler::pn, no_alternative_target);
+    __ delayed()->nop();
+
+    __ ld_ptr(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()), G3);
+    __ st_ptr(G0, Address(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
+
+    __ bind(no_alternative_target);
+  }
+#endif // INCLUDE_JVMCI

  // 6243940 We might end up in handle_wrong_method if
  // the callee is deoptimized as we race thru here. If that
@ -1006,6 +1022,15 @@ void AdapterGenerator::gen_i2c_adapter(
  __ delayed()->nop();
 }

+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+                                    int total_args_passed,
+                                    int comp_args_on_stack,
+                                    const BasicType *sig_bt,
+                                    const VMRegPair *regs) {
+  AdapterGenerator agen(masm);
+  agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
+}
+
 // ---------------------------------------------------------------
 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
                                                            int total_args_passed,
@ -1016,9 +1041,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
                                                            AdapterFingerPrint* fingerprint) {
  address i2c_entry = __ pc();

-  AdapterGenerator agen(masm);
-
-  agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
+  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);


  // -------------------------------------------------------------------------
@ -1063,7 +1086,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
  }

  address c2i_entry = __ pc();
-
+  AdapterGenerator agen(masm);
  agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup);

  __ flush();
@ -1859,7 +1882,7 @@ static void gen_special_dispatch(MacroAssembler* masm,
  } else if (iid == vmIntrinsics::_invokeBasic) {
    has_receiver = true;
  } else {
-    fatal(err_msg_res("unexpected intrinsic id %d", iid));
+    fatal("unexpected intrinsic id %d", iid);
  }

  if (member_reg != noreg) {
@ -2916,6 +2939,11 @@ void SharedRuntime::generate_deopt_blob() {
    pad += StackShadowPages*16 + 32;
  }
 #endif
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    pad += 1000; // Increase the buffer size when compiling for JVMCI
+  }
+#endif
 #ifdef _LP64
  CodeBuffer buffer("deopt_blob", 2100+pad, 512);
 #else
@ -2982,6 +3010,45 @@ void SharedRuntime::generate_deopt_blob() {
  __ ba(cont);
  __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode);

+
+#if INCLUDE_JVMCI
+  Label after_fetch_unroll_info_call;
+  int implicit_exception_uncommon_trap_offset = 0;
+  int uncommon_trap_offset = 0;
+
+  if (EnableJVMCI) {
+    masm->block_comment("BEGIN implicit_exception_uncommon_trap");
+    implicit_exception_uncommon_trap_offset = __ offset() - start;
+
+    __ ld_ptr(G2_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()), O7);
+    __ st_ptr(G0, Address(G2_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
+    __ add(O7, -8, O7);
+
+    uncommon_trap_offset = __ offset() - start;
+
+    // Save everything in sight.
+    (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
+    __ set_last_Java_frame(SP, NULL);
+
+    __ ld(G2_thread, in_bytes(JavaThread::pending_deoptimization_offset()), O1);
+    __ sub(G0, 1, L1);
+    __ st(L1, G2_thread, in_bytes(JavaThread::pending_deoptimization_offset()));
+
+    __ mov((int32_t)Deoptimization::Unpack_reexecute, L0deopt_mode);
+    __ mov(G2_thread, O0);
+    __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
+    __ delayed()->nop();
+    oop_maps->add_gc_map( __ offset()-start, map->deep_copy());
+    __ get_thread();
+    __ add(O7, 8, O7);
+    __ reset_last_Java_frame();
+
+    __ ba(after_fetch_unroll_info_call);
+    __ delayed()->nop(); // Delay slot
+    masm->block_comment("END implicit_exception_uncommon_trap");
+  } // EnableJVMCI
+#endif // INCLUDE_JVMCI
+
  int exception_offset = __ offset() - start;

  // restore G2, the trampoline destroyed it
@ -3004,6 +3071,7 @@ void SharedRuntime::generate_deopt_blob() {
  int exception_in_tls_offset = __ offset() - start;

  // No need to update oop_map  as each call to save_live_registers will produce identical oopmap
+  // Opens a new stack frame
  (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);

  // Restore G2_thread
@ -3035,7 +3103,12 @@ void SharedRuntime::generate_deopt_blob() {
  // Reexecute entry, similar to c2 uncommon trap
  //
  int reexecute_offset = __ offset() - start;
-
+#if INCLUDE_JVMCI && !defined(COMPILER1)
+  if (EnableJVMCI && UseJVMCICompiler) {
+    // JVMCI does not use this kind of deoptimization
+    __ should_not_reach_here();
+  }
+#endif
  // No need to update oop_map  as each call to save_live_registers will produce identical oopmap
  (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);

@ -3059,6 +3132,11 @@ void SharedRuntime::generate_deopt_blob() {

  __ reset_last_Java_frame();

+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    __ bind(after_fetch_unroll_info_call);
+  }
+#endif
  // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers
  // so this move will survive

@ -3124,6 +3202,12 @@ void SharedRuntime::generate_deopt_blob() {
  masm->flush();
  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
+    _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
+  }
+#endif
 }

 #ifdef COMPILER2
--- a/hotspot/src/cpu/sparc/vm/sparc.ad
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad
@ -1098,7 +1098,7 @@ void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  Register r = as_Register(ra_->get_encode(this));
  CodeSection* consts_section = __ code()->consts();
  int consts_size = consts_section->align_at_start(consts_section->size());
-  assert(constant_table.size() == consts_size, err_msg("must be: %d == %d", constant_table.size(), consts_size));
+  assert(constant_table.size() == consts_size, "must be: %d == %d", constant_table.size(), consts_size);

  if (UseRDPCForConstantTableBase) {
    // For the following RDPC logic to work correctly the consts
@ -1860,6 +1860,10 @@ const bool Matcher::match_rule_supported(int opcode) {
  return true;  // Per default match rules are supported.
 }

+const int Matcher::float_pressure(int default_pressure_threshold) {
+  return default_pressure_threshold;
+}
+
 int Matcher::regnum_to_fpu_offset(int regnum) {
  return regnum - 32; // The FP registers are in the second chunk
 }
--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp
@ -204,6 +204,20 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
 address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) {
  address entry = __ pc();
  __ get_constant_pool_cache(LcpoolCache); // load LcpoolCache
+#if INCLUDE_JVMCI
+  // Check if we need to take lock at entry of synchronized method.
+  if (UseJVMCICompiler) {
+    Label L;
+    Address pending_monitor_enter_addr(G2_thread, JavaThread::pending_monitorenter_offset());
+    __ ldbool(pending_monitor_enter_addr, Gtemp);  // Load if pending monitor enter
+    __ cmp_and_br_short(Gtemp, G0, Assembler::equal, Assembler::pn, L);
+    // Clear flag.
+    __ stbool(G0, pending_monitor_enter_addr);
+    // Take lock.
+    lock_method();
+    __ bind(L);
+  }
+#endif
  { Label L;
    Address exception_addr(G2_thread, Thread::pending_exception_offset());
    __ ld_ptr(exception_addr, Gtemp);  // Load pending exception.
@ -349,7 +363,7 @@ void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile
 // Allocate monitor and lock method (asm interpreter)
 // ebx - Method*
 //
-void InterpreterGenerator::lock_method(void) {
+void TemplateInterpreterGenerator::lock_method() {
  __ ld(Lmethod, in_bytes(Method::access_flags_offset()), O0);  // Load access flags.

 #ifdef ASSERT
@ -779,14 +793,14 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {

    // Generate regular method entry
    __ bind(slow_path);
-    (void) generate_normal_entry(false);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
    return entry;
  }
 #endif // INCLUDE_ALL_GCS

  // If G1 is not enabled then attempt to go through the accessor entry point
  // Reference.get is an accessor
-  return generate_jump_to_normal_entry();
+  return NULL;
 }

 //
--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.hpp
@ -37,9 +37,9 @@
 #ifdef _LP64
  // The sethi() instruction generates lots more instructions when shell
  // stack limit is unlimited, so that's why this is much bigger.
-  const static int InterpreterCodeSize = 210 * K;
+  const static int InterpreterCodeSize = 260 * K;
 #else
-  const static int InterpreterCodeSize = 180 * K;
+  const static int InterpreterCodeSize = 230 * K;
 #endif

 #endif // CPU_SPARC_VM_TEMPLATEINTERPRETER_SPARC_HPP
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp
@ -2949,12 +2949,14 @@ void TemplateTable::prepare_invoke(int byte_no,


 void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) {
+  Register Rtemp = G4_scratch;
  Register Rcall = Rindex;
  assert_different_registers(Rcall, G5_method, Gargs, Rret);

  // get target Method* & entry point
  __ lookup_virtual_method(Rrecv, Rindex, G5_method);
  __ profile_arguments_type(G5_method, Rcall, Gargs, true);
+  __ profile_called_method(G5_method, Rtemp);
  __ call_from_interpreter(Rcall, Gargs, Rret);
 }

@ -3211,6 +3213,7 @@ void TemplateTable::invokeinterface(int byte_no) {
  assert_different_registers(Rcall, G5_method, Gargs, Rret);

  __ profile_arguments_type(G5_method, Rcall, Gargs, true);
+  __ profile_called_method(G5_method, Rscratch);
  __ call_from_interpreter(Rcall, Gargs, Rret);
 }

@ -3486,7 +3489,8 @@ void TemplateTable::checkcast() {
  Register RspecifiedKlass = O4;

  // Check for casting a NULL
-  __ br_null_short(Otos_i, Assembler::pn, is_null);
+  __ br_null(Otos_i, false, Assembler::pn, is_null);
+  __ delayed()->nop();

  // Get value klass in RobjKlass
  __ load_klass(Otos_i, RobjKlass); // get value klass
@ -3542,7 +3546,8 @@ void TemplateTable::instanceof() {
  Register RspecifiedKlass = O4;

  // Check for casting a NULL
-  __ br_null_short(Otos_i, Assembler::pt, is_null);
+  __ br_null(Otos_i, false, Assembler::pt, is_null);
+  __ delayed()->nop();

  // Get value klass in RobjKlass
  __ load_klass(Otos_i, RobjKlass); // get value klass
--- a/hotspot/src/cpu/sparc/vm/vmStructs_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/vmStructs_sparc.hpp
@ -37,10 +37,11 @@
  /******************************/                                                                                                   \
  /* JavaFrameAnchor            */                                                                                                   \
  /******************************/                                                                                                   \
-  volatile_nonstatic_field(JavaFrameAnchor,     _flags,                                          int)
-
-#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
+  volatile_nonstatic_field(JavaFrameAnchor,     _flags,                                          int)                                \
+  static_field(VM_Version, _features, int)

+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+  declare_toplevel_type(VM_Version)

 #define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
  /******************************/                                        \
@ -78,7 +79,11 @@
  declare_c2_constant(R_G4_num)                                           \
  declare_c2_constant(R_G5_num)                                           \
  declare_c2_constant(R_G6_num)                                           \
-  declare_c2_constant(R_G7_num)
+  declare_c2_constant(R_G7_num)                                           \
+  declare_constant(VM_Version::vis1_instructions_m)                       \
+  declare_constant(VM_Version::vis2_instructions_m)                       \
+  declare_constant(VM_Version::vis3_instructions_m)                       \
+  declare_constant(VM_Version::cbcond_instructions_m)

 #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)

--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
@ -40,10 +40,6 @@ void VM_Version::initialize() {
  PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
  PrefetchFieldsAhead         = prefetch_fields_ahead();

-  assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 1, "invalid value");
-  if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
-  if( AllocatePrefetchInstr > 1 ) AllocatePrefetchInstr = 0;
-
  // Allocation prefetch settings
  intx cache_line_size = prefetch_data_size();
  if( cache_line_size > AllocatePrefetchStepSize )
@ -59,13 +55,6 @@ void VM_Version::initialize() {
  AllocatePrefetchDistance = allocate_prefetch_distance();
  AllocatePrefetchStyle    = allocate_prefetch_style();

-  assert((AllocatePrefetchDistance % AllocatePrefetchStepSize) == 0 &&
-         (AllocatePrefetchDistance > 0), "invalid value");
-  if ((AllocatePrefetchDistance % AllocatePrefetchStepSize) != 0 ||
-      (AllocatePrefetchDistance <= 0)) {
-    AllocatePrefetchDistance = AllocatePrefetchStepSize;
-  }
-
  if (AllocatePrefetchStyle == 3 && !has_blk_init()) {
    warning("BIS instructions are not available on this CPU");
    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
@ -73,13 +62,6 @@ void VM_Version::initialize() {

  guarantee(VM_Version::has_v9(), "only SPARC v9 is supported");

-  assert(ArraycopySrcPrefetchDistance < 4096, "invalid value");
-  if (ArraycopySrcPrefetchDistance >= 4096)
-    ArraycopySrcPrefetchDistance = 4064;
-  assert(ArraycopyDstPrefetchDistance < 4096, "invalid value");
-  if (ArraycopyDstPrefetchDistance >= 4096)
-    ArraycopyDstPrefetchDistance = 4064;
-
  UseSSE = 0; // Only on x86 and x64

  _supports_cx8 = has_v9();
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp
@ -29,6 +29,7 @@
 #include "runtime/vm_version.hpp"

 class VM_Version: public Abstract_VM_Version {
+  friend class VMStructs;
 protected:
  enum Feature_Flag {
    v8_instructions      = 0,
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
@ -733,11 +733,11 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
    // these asserts are somewhat nonsensical
 #ifndef _LP64
    assert(which == imm_operand || which == disp32_operand,
-           err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
+           "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 #else
    assert((which == call32_operand || which == imm_operand) && is_64bit ||
           which == narrow_oop_operand && !is_64bit,
-           err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
+           "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 #endif // _LP64
    return ip;

@ -770,6 +770,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
    case 0x55: // andnps
    case 0x56: // orps
    case 0x57: // xorps
+    case 0x59: //mulpd
    case 0x6E: // movd
    case 0x7E: // movd
    case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
@ -877,21 +878,35 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
    // Check second byte
    NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));

+    int vex_opcode;
    // First byte
    if ((0xFF & *inst) == VEX_3bytes) {
+      vex_opcode = VEX_OPCODE_MASK & *ip;
      ip++; // third byte
      is_64bit = ((VEX_W & *ip) == VEX_W);
+    } else {
+      vex_opcode = VEX_OPCODE_0F;
    }
    ip++; // opcode
    // To find the end of instruction (which == end_pc_operand).
-    switch (0xFF & *ip) {
-    case 0x61: // pcmpestri r, r/a, #8
-    case 0x70: // pshufd r, r/a, #8
-    case 0x73: // psrldq r, #8
-      tail_size = 1;  // the imm8
-      break;
-    default:
-      break;
+    switch (vex_opcode) {
+      case VEX_OPCODE_0F:
+        switch (0xFF & *ip) {
+        case 0x70: // pshufd r, r/a, #8
+        case 0x71: // ps[rl|ra|ll]w r, #8
+        case 0x72: // ps[rl|ra|ll]d r, #8
+        case 0x73: // ps[rl|ra|ll]q r, #8
+        case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
+        case 0xC4: // pinsrw r, r, r/a, #8
+        case 0xC5: // pextrw r/a, r, #8
+        case 0xC6: // shufp[s|d] r, r, r/a, #8
+          tail_size = 1;  // the imm8
+          break;
+        }
+        break;
+      case VEX_OPCODE_0F_3A:
+        tail_size = 1;
+        break;
    }
    ip++; // skip opcode
    debug_only(has_disp32 = true); // has both kinds of operands!
@ -1604,6 +1619,85 @@ void Assembler::cpuid() {
  emit_int8((unsigned char)0xA2);
 }

+// Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
+// F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
+// F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
+// F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
+//
+// F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
+//
+// F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
+//
+// F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
+void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
+  assert(VM_Version::supports_sse4_2(), "");
+  int8_t w = 0x01;
+  Prefix p = Prefix_EMPTY;
+
+  emit_int8((int8_t)0xF2);
+  switch (sizeInBytes) {
+  case 1:
+    w = 0;
+    break;
+  case 2:
+  case 4:
+    break;
+  LP64_ONLY(case 8:)
+    // This instruction is not valid in 32 bits
+    // Note:
+    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+    //
+    // Page B - 72   Vol. 2C says
+    // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
+    // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
+    //                                                                            F0!!!
+    // while 3 - 208 Vol. 2A
+    // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
+    //
+    // the 0 on a last bit is reserved for a different flavor of this instruction :
+    // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
+    p = REX_W;
+    break;
+  default:
+    assert(0, "Unsupported value for a sizeInBytes argument");
+    break;
+  }
+  LP64_ONLY(prefix(crc, v, p);)
+  emit_int8((int8_t)0x0F);
+  emit_int8(0x38);
+  emit_int8((int8_t)(0xF0 | w));
+  emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
+}
+
+void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
+  assert(VM_Version::supports_sse4_2(), "");
+  InstructionMark im(this);
+  int8_t w = 0x01;
+  Prefix p = Prefix_EMPTY;
+
+  emit_int8((int8_t)0xF2);
+  switch (sizeInBytes) {
+  case 1:
+    w = 0;
+    break;
+  case 2:
+  case 4:
+    break;
+  LP64_ONLY(case 8:)
+    // This instruction is not valid in 32 bits
+    p = REX_W;
+    break;
+  default:
+    assert(0, "Unsupported value for a sizeInBytes argument");
+    break;
+  }
+  LP64_ONLY(prefix(crc, adr, p);)
+  emit_int8((int8_t)0x0F);
+  emit_int8(0x38);
+  emit_int8((int8_t)(0xF0 | w));
+  emit_operand(crc, adr);
+}
+
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true);
@ -2399,7 +2493,7 @@ void Assembler::movsbl(Register dst, Address src) { // movsxb

 void Assembler::movsbl(Register dst, Register src) { // movsxb
  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
+  int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
  emit_int8(0x0F);
  emit_int8((unsigned char)0xBE);
  emit_int8((unsigned char)(0xC0 | encode));
@ -2516,7 +2610,7 @@ void Assembler::movzbl(Register dst, Address src) { // movzxb

 void Assembler::movzbl(Register dst, Register src) { // movzxb
  NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
+  int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
  emit_int8(0x0F);
  emit_int8((unsigned char)0xB6);
  emit_int8(0xC0 | encode);
@ -2951,6 +3045,15 @@ void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
  emit_int8(imm8);
 }

+void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
+  assert(VM_Version::supports_sse2(), "");
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
+                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  emit_int8((unsigned char)0xC5);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
  assert(VM_Version::supports_sse4_1(), "");
  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
@ -2969,6 +3072,15 @@ void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
  emit_int8(imm8);
 }

+void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
+  assert(VM_Version::supports_sse2(), "");
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
+                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  emit_int8((unsigned char)0xC4);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
  assert(VM_Version::supports_sse4_1(), "");
  if (VM_Version::supports_evex()) {
@ -3984,6 +4096,16 @@ void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
  }
 }

+void Assembler::mulpd(XMMRegister dst, Address src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+  }
+}
+
 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
  _instruction_uses_vl = true;
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@ -4172,6 +4294,26 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector
  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
 }

+void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
+  }
+}
+
+void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
+  }
+}
+
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  if (VM_Version::supports_avx512dq()) {
@ -4792,8 +4934,9 @@ void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int
 }


-// AND packed integers
+// logical operations packed integers
 void Assembler::pand(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
 }
@ -4814,6 +4957,17 @@ void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_
  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
 }

+void Assembler::pandn(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
+  }
+  else {
+    emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
+  }
+}
+
 void Assembler::por(XMMRegister dst, XMMRegister src) {
  _instruction_uses_vl = true;
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@ -6223,6 +6377,14 @@ void Assembler::shldl(Register dst, Register src) {
  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }

+// 0F A4 / r ib
+void Assembler::shldl(Register dst, Register src, int8_t imm8) {
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA4);
+  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
+  emit_int8(imm8);
+}
+
 void Assembler::shrdl(Register dst, Register src) {
  emit_int8(0x0F);
  emit_int8((unsigned char)0xAD);
@ -6362,12 +6524,12 @@ int Assembler::prefixq_and_encode(int reg_enc) {
  return reg_enc;
 }

-int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
+int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
  if (dst_enc < 8) {
    if (src_enc >= 8) {
      prefix(REX_B);
      src_enc -= 8;
-    } else if (byteinst && src_enc >= 4) {
+    } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
      prefix(REX);
    }
  } else {
@ -6408,6 +6570,40 @@ void Assembler::prefix(Register reg) {
  }
 }

+void Assembler::prefix(Register dst, Register src, Prefix p) {
+  if (src->encoding() >= 8) {
+    p = (Prefix)(p | REX_B);
+  }
+  if (dst->encoding() >= 8) {
+    p = (Prefix)( p | REX_R);
+  }
+  if (p != Prefix_EMPTY) {
+    // do not generate an empty prefix
+    prefix(p);
+  }
+}
+
+void Assembler::prefix(Register dst, Address adr, Prefix p) {
+  if (adr.base_needs_rex()) {
+    if (adr.index_needs_rex()) {
+      assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
+    } else {
+      prefix(REX_B);
+    }
+  } else {
+    if (adr.index_needs_rex()) {
+      assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
+    }
+  }
+  if (dst->encoding() >= 8) {
+    p = (Prefix)(p | REX_R);
+  }
+  if (p != Prefix_EMPTY) {
+    // do not generate an empty prefix
+    prefix(p);
+  }
+}
+
 void Assembler::prefix(Address adr) {
  if (adr.base_needs_rex()) {
    if (adr.index_needs_rex()) {
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
@ -506,7 +506,8 @@ class Assembler : public AbstractAssembler  {

    VEX_3bytes = 0xC4,
    VEX_2bytes = 0xC5,
-    EVEX_4bytes = 0x62
+    EVEX_4bytes = 0x62,
+    Prefix_EMPTY = 0x0
  };

  enum VexPrefix {
@ -535,7 +536,8 @@ class Assembler : public AbstractAssembler  {
    VEX_OPCODE_NONE  = 0x0,
    VEX_OPCODE_0F    = 0x1,
    VEX_OPCODE_0F_38 = 0x2,
-    VEX_OPCODE_0F_3A = 0x3
+    VEX_OPCODE_0F_3A = 0x3,
+    VEX_OPCODE_MASK  = 0x1F
  };

  enum AvxVectorLen {
@ -611,10 +613,15 @@ private:
  int prefix_and_encode(int reg_enc, bool byteinst = false);
  int prefixq_and_encode(int reg_enc);

-  int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
+  int prefix_and_encode(int dst_enc, int src_enc) {
+    return prefix_and_encode(dst_enc, false, src_enc, false);
+  }
+  int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
  int prefixq_and_encode(int dst_enc, int src_enc);

  void prefix(Register reg);
+  void prefix(Register dst, Register src, Prefix p);
+  void prefix(Register dst, Address adr, Prefix p);
  void prefix(Address adr);
  void prefixq(Address adr);

@ -1177,6 +1184,10 @@ private:
  // Identify processor type and features
  void cpuid();

+  // CRC32C
+  void crc32(Register crc, Register v, int8_t sizeInBytes);
+  void crc32(Register crc, Address adr, int8_t sizeInBytes);
+
  // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
  void cvtsd2ss(XMMRegister dst, XMMRegister src);
  void cvtsd2ss(XMMRegister dst, Address src);
@ -1672,10 +1683,14 @@ private:
  // SSE 4.1 extract
  void pextrd(Register dst, XMMRegister src, int imm8);
  void pextrq(Register dst, XMMRegister src, int imm8);
+  // SSE 2 extract
+  void pextrw(Register dst, XMMRegister src, int imm8);

  // SSE 4.1 insert
  void pinsrd(XMMRegister dst, Register src, int imm8);
  void pinsrq(XMMRegister dst, Register src, int imm8);
+  // SSE 2 insert
+  void pinsrw(XMMRegister dst, Register src, int imm8);

  // SSE4.1 packed move
  void pmovzxbw(XMMRegister dst, XMMRegister src);
@ -1783,6 +1798,7 @@ private:
  void setb(Condition cc, Register dst);

  void shldl(Register dst, Register src);
+  void shldl(Register dst, Register src, int8_t imm8);

  void shll(Register dst, int imm8);
  void shll(Register dst);
@ -1925,6 +1941,7 @@ private:

  // Multiply Packed Floating-Point Values
  void mulpd(XMMRegister dst, XMMRegister src);
+  void mulpd(XMMRegister dst, Address src);
  void mulps(XMMRegister dst, XMMRegister src);
  void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@ -1951,6 +1968,9 @@ private:
  void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

+  void unpckhpd(XMMRegister dst, XMMRegister src);
+  void unpcklpd(XMMRegister dst, XMMRegister src);
+
  // Bitwise Logical XOR of Packed Floating-Point Values
  void xorpd(XMMRegister dst, XMMRegister src);
  void xorps(XMMRegister dst, XMMRegister src);
@ -2046,6 +2066,9 @@ private:
  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

+  // Andn packed integers
+  void pandn(XMMRegister dst, XMMRegister src);
+
  // Or packed integers
  void por(XMMRegister dst, XMMRegister src);
  void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp
@ -33,10 +33,12 @@
 inline int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { return reg_enc; }
 inline int Assembler::prefixq_and_encode(int reg_enc) { return reg_enc; }

-inline int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { return dst_enc << 3 | src_enc; }
+inline int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) { return dst_enc << 3 | src_enc; }
 inline int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { return dst_enc << 3 | src_enc; }

 inline void Assembler::prefix(Register reg) {}
+inline void Assembler::prefix(Register dst, Register src, Prefix p) {}
+inline void Assembler::prefix(Register dst, Address adr, Prefix p) {}
 inline void Assembler::prefix(Address adr) {}
 inline void Assembler::prefixq(Address adr) {}

--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
@ -2457,9 +2457,6 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
        // Should consider not saving rbx, if not necessary
        __ trigfunc('t', op->as_Op2()->fpu_stack_size());
        break;
-      case lir_exp :
-        __ exp_with_fallback(op->as_Op2()->fpu_stack_size());
-        break;
      case lir_pow :
        __ pow_with_fallback(op->as_Op2()->fpu_stack_size());
        break;
@ -2684,7 +2681,7 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
 #endif // _LP64
        }
      } else {
-        fatal(err_msg("unexpected type: %s", basictype_to_str(c->type())));
+        fatal("unexpected type: %s", basictype_to_str(c->type()));
      }
      // cpu register - address
    } else if (opr2->is_address()) {
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
@ -808,6 +808,12 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {

 void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
  assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
+
+  if (x->id() == vmIntrinsics::_dexp) {
+    do_ExpIntrinsic(x);
+    return;
+  }
+
  LIRItem value(x->argument_at(0), this);

  bool use_fpu = false;
@ -818,7 +824,6 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
      case vmIntrinsics::_dtan:
      case vmIntrinsics::_dlog:
      case vmIntrinsics::_dlog10:
-      case vmIntrinsics::_dexp:
      case vmIntrinsics::_dpow:
        use_fpu = true;
    }
@ -870,7 +875,6 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
    case vmIntrinsics::_dtan:   __ tan  (calc_input, calc_result, tmp1, tmp2);              break;
    case vmIntrinsics::_dlog:   __ log  (calc_input, calc_result, tmp1);                    break;
    case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1);                    break;
-    case vmIntrinsics::_dexp:   __ exp  (calc_input, calc_result,              tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
    case vmIntrinsics::_dpow:   __ pow  (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
    default:                    ShouldNotReachHere();
  }
@ -880,6 +884,32 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
  }
 }

+void LIRGenerator::do_ExpIntrinsic(Intrinsic* x) {
+  LIRItem value(x->argument_at(0), this);
+  value.set_destroys_register();
+
+  LIR_Opr calc_result = rlock_result(x);
+  LIR_Opr result_reg = result_register_for(x->type());
+
+  BasicTypeList signature(1);
+  signature.append(T_DOUBLE);
+  CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+
+  value.load_item_force(cc->at(0));
+
+#ifndef _LP64
+  LIR_Opr tmp = FrameMap::fpu0_double_opr;
+  result_reg = tmp;
+  if (VM_Version::supports_sse2()) {
+    __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
+  } else {
+    __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
+  }
+#else
+  __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
+#endif
+  __ move(result_reg, calc_result);
+}

 void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
  assert(x->number_of_arguments() == 5, "wrong type");
--- a/hotspot/src/cpu/x86/vm/c1_LinearScan_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LinearScan_x86.cpp
@ -814,8 +814,7 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {

    case lir_tan:
    case lir_sin:
-    case lir_cos:
-    case lir_exp: {
+    case lir_cos: {
      // sin, cos and exp need two temporary fpu stack slots, so there are two temporary
      // registers (stored in right and temp of the operation).
      // the stack allocator must guarantee that the stack slots are really free,
--- a/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp
@ -48,11 +48,11 @@ define_pd_global(intx, CompileThreshold,             10000);

 define_pd_global(intx, OnStackReplacePercentage,     140);
 define_pd_global(intx, ConditionalMoveLimit,         3);
-define_pd_global(intx, FLOATPRESSURE,                6);
 define_pd_global(intx, FreqInlineSize,               325);
 define_pd_global(intx, MinJumpTableSize,             10);
 #ifdef AMD64
 define_pd_global(intx, INTPRESSURE,                  13);
+define_pd_global(intx, FLOATPRESSURE,                14);
 define_pd_global(intx, InteriorEntryAlignment,       16);
 define_pd_global(size_t, NewSizeThreadIncrease,      ScaleForWordSize(4*K));
 define_pd_global(intx, LoopUnrollLimit,              60);
@ -64,6 +64,7 @@ define_pd_global(intx, CodeCacheExpansionSize,       64*K);
 define_pd_global(uint64_t, MaxRAM,                   128ULL*G);
 #else
 define_pd_global(intx, INTPRESSURE,                  6);
+define_pd_global(intx, FLOATPRESSURE,                6);
 define_pd_global(intx, InteriorEntryAlignment,       4);
 define_pd_global(size_t, NewSizeThreadIncrease,      4*K);
 define_pd_global(intx, LoopUnrollLimit,              50);     // Design center runs on 1.3.1
@ -82,6 +83,7 @@ define_pd_global(bool, OptoPeephole,                 true);
 define_pd_global(bool, UseCISCSpill,                 true);
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
+define_pd_global(bool, OptoRegScheduling,            true);

 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
--- a/hotspot/src/cpu/x86/vm/compiledIC_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/compiledIC_x86.cpp
@ -50,13 +50,15 @@ bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
 // ----------------------------------------------------------------------------

 #define __ _masm.
-address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
  // Stub is fixed up when the corresponding call is converted from
  // calling compiled code to calling interpreted code.
  // movq rbx, 0
  // jmp -5 # to self

-  address mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  if (mark == NULL) {
+    mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  }

  // Note that the code buffer's insts_mark is always relative to insts.
  // That's why we must use the macroassembler to generate a stub.
@ -73,6 +75,8 @@ address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
  // This is recognized as unresolved by relocs/nativeinst/ic code.
  __ jump(RuntimeAddress(__ pc()));

+  assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size");
+
  // Update current stubs pointer and restore insts_end.
  __ end_a_stub();
  return base;
@ -104,10 +108,15 @@ void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry)
  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());

-  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
+#ifdef ASSERT
+  // read the value once
+  intptr_t data = method_holder->data();
+  address destination = jump->jump_destination();
+  assert(data == 0 || data == (intptr_t)callee(),
         "a) MT-unsafe modification of inline cache");
-  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
+  assert(destination == (address)-1 || destination == entry,
         "b) MT-unsafe modification of inline cache");
+#endif

  // Update stub.
  method_holder->set_data((intptr_t)callee());
@ -124,11 +133,12 @@ void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub)
  assert(stub != NULL, "stub not found");
  // Creation also verifies the object.
  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
-  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
  method_holder->set_data(0);
+  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
  jump->set_jump_destination((address)-1);
 }

+
 //-----------------------------------------------------------------------------
 // Non-product mode code
 #ifndef PRODUCT
@ -150,5 +160,4 @@ void CompiledStaticCall::verify() {
  // Verify state.
  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
 }
-
 #endif // !PRODUCT
--- a/hotspot/src/cpu/x86/vm/cppInterpreterGenerator_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/cppInterpreterGenerator_x86.hpp
@ -29,6 +29,7 @@

  void generate_more_monitors();
  void generate_deopt_handling();
+  void lock_method(void);
  address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only
  void generate_compute_interpreter_state(const Register state,
                                          const Register prev_state,
--- a/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp
@ -741,7 +741,7 @@ void InterpreterGenerator::generate_stack_overflow_check(void) {
 // Find preallocated  monitor and lock method (C++ interpreter)
 // rbx - Method*
 //
-void InterpreterGenerator::lock_method(void) {
+void CppInterpreterGenerator::lock_method() {
  // assumes state == rsi/r13 == pointer to current interpreterState
  // minimally destroys rax, rdx|c_rarg1, rdi
  //
@ -807,7 +807,7 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {

  // If G1 is not enabled then attempt to go through the accessor entry point
  // Reference.get is an accessor
-  return generate_jump_to_normal_entry();
+  return NULL;
 }

 //
--- a/hotspot/src/cpu/x86/vm/crc32c.h
+++ b/hotspot/src/cpu/x86/vm/crc32c.h
@ -0,0 +1,66 @@
+/*
+* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+enum {
+  // S. Gueron / Information Processing Letters 112 (2012) 184
+  // shows than anything above 6K and below 32K is a good choice
+  // 32K does not deliver any further performance gains
+  // 6K=8*256 (*3 as we compute 3 blocks together)
+  //
+  // Thus selecting the smallest value so it could apply to the largest number
+  // of buffer sizes.
+  CRC32C_HIGH = 8 * 256,
+
+  // empirical
+  // based on ubench study using methodology described in
+  // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 8
+  //
+  // arbitrary value between 27 and 256
+  CRC32C_MIDDLE = 8 * 86,
+
+  // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 9
+  // shows that 240 and 1024 are equally good choices as the 216==8*27
+  //
+  // Selecting the smallest value which resulted in a significant performance improvement over
+  // sequential version
+  CRC32C_LOW = 8 * 27,
+
+  CRC32C_NUM_ChunkSizeInBytes = 3,
+
+  // We need to compute powers of 64N and 128N for each "chunk" size
+  CRC32C_NUM_PRECOMPUTED_CONSTANTS = ( 2 * CRC32C_NUM_ChunkSizeInBytes )
+};
+// Notes:
+// 1. Why we need to choose a "chunk" approach?
+// Overhead of computing a powers and powers of for an arbitrary buffer of size N is significant
+// (implementation approaches a library perf.)
+// 2. Why only 3 "chunks"?
+// Performance experiments results showed that a HIGH+LOW was not delivering a stable speedup
+// curve.
+//
+// Disclaimer:
+// If you ever decide to increase/decrease number of "chunks" be sure to modify
+// a) constants table generation (hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp)
+// b) constant fetch from that table (macroAssembler_x86.cpp)
+// c) unrolled for loop (macroAssembler_x86.cpp)
--- a/hotspot/src/cpu/x86/vm/frame_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/frame_x86.cpp
@ -48,8 +48,6 @@ void RegisterMap::check_location_valid() {
 }
 #endif

-PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
-
 // Profiling/safepoint support

 bool frame::safe_for_sender(JavaThread *thread) {
@ -280,7 +278,7 @@ void frame::patch_pc(Thread* thread, address pc) {
  address* pc_addr = &(((address*) sp())[-1]);
  if (TracePcPatching) {
    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
-                  pc_addr, *pc_addr, pc);
+                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
  }
  // Either the return address is the original one or we are going to
  // patch in the same address that's already there.
@ -458,11 +456,11 @@ frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
  // This is the sp before any possible extension (adapter/locals).
  intptr_t* unextended_sp = interpreter_frame_sender_sp();

-#ifdef COMPILER2
+#if defined(COMPILER2) || INCLUDE_JVMCI
  if (map->update_map()) {
    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
  }
-#endif // COMPILER2
+#endif // COMPILER2 || INCLUDE_JVMCI

  return frame(sender_sp, unextended_sp, link(), sender_pc());
 }
@ -683,10 +681,19 @@ void frame::describe_pd(FrameValues& values, int frame_no) {
    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
-#endif
+#ifdef AMD64
+  } else if (is_entry_frame()) {
+    // This could be more descriptive if we use the enum in
+    // stubGenerator to map to real names but it's most important to
+    // claim these frame slots so the error checking works.
+    for (int i = 0; i < entry_frame_after_call_words; i++) {
+      values.describe(frame_no, fp() - i, err_msg("call_stub word fp - %d", i));
+    }
+#endif // AMD64
  }
-}
 #endif
+}
+#endif // !PRODUCT

 intptr_t *frame::initial_deoptimization_info() {
  // used to reset the saved FP
--- a/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp
+++ b/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp
@ -78,7 +78,11 @@ inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address
    assert(((nmethod*)_cb)->insts_contains(_pc), "original PC must be in nmethod");
    _deopt_state = is_deoptimized;
  } else {
-    _deopt_state = not_deoptimized;
+    if (_cb->is_deoptimization_stub()) {
+      _deopt_state = is_deoptimized;
+    } else {
+      _deopt_state = not_deoptimized;
+    }
  }
 }

--- a/hotspot/src/cpu/x86/vm/globals_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp
@ -46,7 +46,7 @@ define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs
 // the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
 // the uep and the vep doesn't get real alignment but just slops on by
 // only assured that the entry instruction meets the 5 byte size requirement.
-#ifdef COMPILER2
+#if defined(COMPILER2) || INCLUDE_JVMCI
 define_pd_global(intx, CodeEntryAlignment,       32);
 #else
 define_pd_global(intx, CodeEntryAlignment,       16);
@ -91,6 +91,7 @@ define_pd_global(bool, PreserveFramePointer, false);
                                                                            \
  product(intx, UseAVX, 99,                                                 \
          "Highest supported AVX instructions set on x86/x64")              \
+          range(0, 99)                                                      \
                                                                            \
  product(bool, UseCLMUL, false,                                            \
          "Control whether CLMUL instructions can be used on x86/x64")      \
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86.cpp
@ -40,6 +40,11 @@

 // Implementation of InterpreterMacroAssembler

+void InterpreterMacroAssembler::jump_to_entry(address entry) {
+  assert(entry, "Entry must have been generated by now");
+  jump(RuntimeAddress(entry));
+}
+
 #ifndef CC_INTERP
 void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
  Label update, next, none;
@ -1497,13 +1502,39 @@ void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
    bind(skip_receiver_profile);

    // The method data pointer needs to be updated to reflect the new target.
+#if INCLUDE_JVMCI
+    if (MethodProfileWidth == 0) {
+      update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+    }
+#else // INCLUDE_JVMCI
    update_mdp_by_constant(mdp,
                           in_bytes(VirtualCallData::
                                    virtual_call_data_size()));
+#endif // INCLUDE_JVMCI
    bind(profile_continue);
  }
 }

+#if INCLUDE_JVMCI
+void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
+  assert_different_registers(method, mdp, reg2);
+  if (ProfileInterpreter && MethodProfileWidth > 0) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    Label done;
+    record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
+      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
+    bind(done);
+
+    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+#endif // INCLUDE_JVMCI
+
 // This routine creates a state machine for updating the multi-row
 // type profile at a virtual call site (or other type-sensitive bytecode).
 // The machine visits each row (of receiver/count) until the receiver type
@ -1523,14 +1554,36 @@ void InterpreterMacroAssembler::record_klass_in_profile_helper(
    if (is_virtual_call) {
      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
    }
-    return;
-  }
+#if INCLUDE_JVMCI
+    else if (EnableJVMCI) {
+      increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()));
+    }
+#endif // INCLUDE_JVMCI
+  } else {
+    int non_profiled_offset = -1;
+    if (is_virtual_call) {
+      non_profiled_offset = in_bytes(CounterData::count_offset());
+    }
+#if INCLUDE_JVMCI
+    else if (EnableJVMCI) {
+      non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
+    }
+#endif // INCLUDE_JVMCI

-  int last_row = VirtualCallData::row_limit() - 1;
+    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
+        &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
+  }
+}
+
+void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
+                                        Register reg2, int start_row, Label& done, int total_rows,
+                                        OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                        int non_profiled_offset) {
+  int last_row = total_rows - 1;
  assert(start_row <= last_row, "must be work left to do");
-  // Test this row for both the receiver and for null.
+  // Test this row for both the item and for null.
  // Take any of three different outcomes:
-  //   1. found receiver => increment count and goto done
+  //   1. found item => increment count and goto done
  //   2. found null => keep looking for case 1, maybe allocate this cell
  //   3. found something else => keep looking for cases 1 and 2
  // Case 3 is handled by a recursive call.
@ -1538,30 +1591,30 @@ void InterpreterMacroAssembler::record_klass_in_profile_helper(
    Label next_test;
    bool test_for_null_also = (row == start_row);

-    // See if the receiver is receiver[n].
-    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
-    test_mdp_data_at(mdp, recvr_offset, receiver,
+    // See if the item is item[n].
+    int item_offset = in_bytes(item_offset_fn(row));
+    test_mdp_data_at(mdp, item_offset, item,
                     (test_for_null_also ? reg2 : noreg),
                     next_test);
-    // (Reg2 now contains the receiver from the CallData.)
+    // (Reg2 now contains the item from the CallData.)

-    // The receiver is receiver[n].  Increment count[n].
-    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    // The item is item[n].  Increment count[n].
+    int count_offset = in_bytes(item_count_offset_fn(row));
    increment_mdp_data_at(mdp, count_offset);
    jmp(done);
    bind(next_test);

    if (test_for_null_also) {
      Label found_null;
-      // Failed the equality check on receiver[n]...  Test for null.
+      // Failed the equality check on item[n]...  Test for null.
      testptr(reg2, reg2);
      if (start_row == last_row) {
        // The only thing left to do is handle the null case.
-        if (is_virtual_call) {
+        if (non_profiled_offset >= 0) {
          jccb(Assembler::zero, found_null);
-          // Receiver did not match any saved receiver and there is no empty row for it.
+          // Item did not match any saved item and there is no empty row for it.
          // Increment total counter to indicate polymorphic case.
-          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+          increment_mdp_data_at(mdp, non_profiled_offset);
          jmp(done);
          bind(found_null);
        } else {
@ -1573,21 +1626,22 @@ void InterpreterMacroAssembler::record_klass_in_profile_helper(
      jcc(Assembler::zero, found_null);

      // Put all the "Case 3" tests here.
-      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
+      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
+        item_offset_fn, item_count_offset_fn, non_profiled_offset);

-      // Found a null.  Keep searching for a matching receiver,
+      // Found a null.  Keep searching for a matching item,
      // but remember that this is an empty (unused) slot.
      bind(found_null);
    }
  }

-  // In the fall-through case, we found no matching receiver, but we
-  // observed the receiver[start_row] is NULL.
+  // In the fall-through case, we found no matching item, but we
+  // observed the item[start_row] is NULL.

-  // Fill in the receiver field and increment the count.
-  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
-  set_mdp_data_at(mdp, recvr_offset, receiver);
-  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  // Fill in the item field and increment the count.
+  int item_offset = in_bytes(item_offset_fn(start_row));
+  set_mdp_data_at(mdp, item_offset, item);
+  int count_offset = in_bytes(item_count_offset_fn(start_row));
  movl(reg2, DataLayout::counter_increment);
  set_mdp_data_at(mdp, count_offset, reg2);
  if (start_row > 0) {
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp
@ -32,6 +32,7 @@

 // This file specializes the assember with interpreter-specific macros

+typedef ByteSize (*OffsetFunction)(uint);

 class InterpreterMacroAssembler: public MacroAssembler {

@ -60,6 +61,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
    _locals_register(LP64_ONLY(r14) NOT_LP64(rdi)),
    _bcp_register(LP64_ONLY(r13) NOT_LP64(rsi)) {}

+  void jump_to_entry(address entry);
+
  void load_earlyret_value(TosState state);

 #ifdef CC_INTERP
@ -249,6 +252,10 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void record_klass_in_profile_helper(Register receiver, Register mdp,
                                      Register reg2, int start_row,
                                      Label& done, bool is_virtual_call);
+  void record_item_in_profile_helper(Register item, Register mdp,
+                                     Register reg2, int start_row, Label& done, int total_rows,
+                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                     int non_profiled_offset);

  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
@ -262,6 +269,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void profile_virtual_call(Register receiver, Register mdp,
                            Register scratch2,
                            bool receiver_can_be_null = false);
+  void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN;
  void profile_ret(Register return_bci, Register mdp);
  void profile_null_seen(Register mdp);
  void profile_typecheck(Register mdp, Register klass, Register scratch);
--- a/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.cpp
@ -31,17 +31,6 @@

 #define __ _masm->

-// Jump into normal path for accessor and empty entry to jump to normal entry
-// The "fast" optimization don't update compilation count therefore can disable inlining
-// for these functions that should be inlined.
-address InterpreterGenerator::generate_jump_to_normal_entry(void) {
-  address entry_point = __ pc();
-
-  assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated");
-  __ jump(RuntimeAddress(Interpreter::entry_for_kind(Interpreter::zerolocals)));
-  return entry_point;
-}
-
 // Abstract method entry
 // Attempt to execute abstract method. Throw exception
 address InterpreterGenerator::generate_abstract_entry(void) {
--- a/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp
@ -36,19 +36,18 @@
  address generate_native_entry(bool synchronized);
  address generate_abstract_entry(void);
  address generate_math_entry(AbstractInterpreter::MethodKind kind);
-  address generate_jump_to_normal_entry(void);
-  address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); }
-  address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
+  address generate_accessor_entry(void) { return NULL; }
+  address generate_empty_entry(void) { return NULL; }
  address generate_Reference_get_entry();
  address generate_CRC32_update_entry();
  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+  address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind);
 #ifndef _LP64
  address generate_Float_intBitsToFloat_entry();
  address generate_Float_floatToRawIntBits_entry();
  address generate_Double_longBitsToDouble_entry();
  address generate_Double_doubleToRawLongBits_entry();
 #endif
-  void lock_method(void);
  void generate_stack_overflow_check(void);

  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
--- a/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp
@ -151,11 +151,15 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
      __ pop_fTOS();
      break;
    case Interpreter::java_lang_math_exp:
-      __ exp_with_fallback(0);
-      // Store to stack to convert 80bit precision back to 64bits
-      __ push_fTOS();
-      __ pop_fTOS();
-      break;
+      __ subptr(rsp, 2*wordSize);
+      __ fstp_d(Address(rsp, 0));
+      if (VM_Version::supports_sse2()) {
+        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
+      } else {
+        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
+      }
+      __ addptr(rsp, 2*wordSize);
+    break;
    default                              :
        ShouldNotReachHere();
  }
--- a/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp
@ -52,8 +52,6 @@

 #define __ _masm->

-PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
-
 #ifdef _WIN64
 address AbstractInterpreterGenerator::generate_slow_signature_handler() {
  address entry = __ pc();
@ -252,6 +250,9 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin

  if (kind == Interpreter::java_lang_math_sqrt) {
    __ sqrtsd(xmm0, Address(rsp, wordSize));
+  } else if (kind == Interpreter::java_lang_math_exp) {
+    __ movdbl(xmm0, Address(rsp, wordSize));
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
  } else {
    __ fld_d(Address(rsp, wordSize));
    switch (kind) {
@ -278,9 +279,6 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
                                              // empty stack slot)
          __ pow_with_fallback(0);
          break;
-      case Interpreter::java_lang_math_exp:
-          __ exp_with_fallback(0);
-           break;
      default                              :
          ShouldNotReachHere();
    }
--- a/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp
@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "precompiled.hpp"
+#include "compiler/disassembler.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "jvmci/jvmciEnv.hpp"
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "asm/register.hpp"
+#include "classfile/vmSymbols.hpp"
+#include "code/vmreg.hpp"
+#include "vmreg_x86.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+  if (inst->is_call() || inst->is_jump()) {
+    assert(NativeCall::instruction_size == (int)NativeJump::instruction_size, "unexpected size");
+    return (pc_offset + NativeCall::instruction_size);
+  } else if (inst->is_mov_literal64()) {
+    // mov+call instruction pair
+    jint offset = pc_offset + NativeMovConstReg::instruction_size;
+    u_char* call = (u_char*) (_instructions->start() + offset);
+    if (call[0] == Assembler::REX_B) {
+      offset += 1; /* prefix byte for extended register R8-R15 */
+      call++;
+    }
+    assert(call[0] == 0xFF, "expected call");
+    offset += 2; /* opcode byte + modrm byte */
+    return (offset);
+  } else if (inst->is_call_reg()) {
+    // the inlined vtable stub contains a "call register" instruction
+    assert(method != NULL, "only valid for virtual calls");
+    return (pc_offset + ((NativeCallReg *) inst)->next_instruction_offset());
+  } else if (inst->is_cond_jump()) {
+    address pc = (address) (inst);
+    return pc_offset + (jint) (Assembler::locate_next_instruction(pc) - pc);
+  } else {
+    fatal("unsupported type of instruction for call site");
+    return 0;
+  }
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+  address pc = _instructions->start() + pc_offset;
+  Handle obj = HotSpotObjectConstantImpl::object(constant);
+  jobject value = JNIHandles::make_local(obj());
+  if (HotSpotObjectConstantImpl::compressed(constant)) {
+#ifdef _LP64
+    address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand);
+    int oop_index = _oop_recorder->find_index(value);
+    _instructions->relocate(pc, oop_Relocation::spec(oop_index), Assembler::narrow_oop_operand);
+    TRACE_jvmci_3("relocating (narrow oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
+#else
+    fatal("compressed oop on 32bit");
+#endif
+  } else {
+    address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
+    *((jobject*) operand) = value;
+    _instructions->relocate(pc, oop_Relocation::spec_for_immediate(), Assembler::imm_operand);
+    TRACE_jvmci_3("relocating (oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
+  }
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+  address pc = _instructions->start() + pc_offset;
+
+  address operand = Assembler::locate_operand(pc, Assembler::disp32_operand);
+  address next_instruction = Assembler::locate_next_instruction(pc);
+  address dest = _constants->start() + data_offset;
+
+  long disp = dest - next_instruction;
+  assert(disp == (jint) disp, "disp doesn't fit in 32 bits");
+  *((jint*) operand) = (jint) disp;
+
+  _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS), Assembler::disp32_operand);
+  TRACE_jvmci_3("relocating at " PTR_FORMAT "/" PTR_FORMAT " with destination at " PTR_FORMAT " (%d)", p2i(pc), p2i(operand), p2i(dest), data_offset);
+}
+
+void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+  if (cb->is_nmethod()) {
+    nmethod* nm = (nmethod*) cb;
+    nativeJump_at((address)inst)->set_jump_destination(nm->verified_entry_point());
+  } else {
+    nativeJump_at((address)inst)->set_jump_destination(cb->code_begin());
+  }
+  _instructions->relocate((address)inst, runtime_call_Relocation::spec(), Assembler::call32_operand);
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+  address pc = (address) inst;
+  if (inst->is_call()) {
+    // NOTE: for call without a mov, the offset must fit a 32-bit immediate
+    //       see also CompilerToVM.getMaxCallTargetOffset()
+    NativeCall* call = nativeCall_at(pc);
+    call->set_destination((address) foreign_call_destination);
+    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec(), Assembler::call32_operand);
+  } else if (inst->is_mov_literal64()) {
+    NativeMovConstReg* mov = nativeMovConstReg_at(pc);
+    mov->set_data((intptr_t) foreign_call_destination);
+    _instructions->relocate(mov->instruction_address(), runtime_call_Relocation::spec(), Assembler::imm_operand);
+  } else if (inst->is_jump()) {
+    NativeJump* jump = nativeJump_at(pc);
+    jump->set_jump_destination((address) foreign_call_destination);
+    _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec(), Assembler::call32_operand);
+  } else if (inst->is_cond_jump()) {
+    address old_dest = nativeGeneralJump_at(pc)->jump_destination();
+    address disp = Assembler::locate_operand(pc, Assembler::call32_operand);
+    *(jint*) disp += ((address) foreign_call_destination) - old_dest;
+    _instructions->relocate(pc, runtime_call_Relocation::spec(), Assembler::call32_operand);
+  } else {
+    fatal("unsupported relocation for foreign call");
+  }
+
+  TRACE_jvmci_3("relocating (foreign call)  at " PTR_FORMAT, p2i(inst));
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+#ifdef ASSERT
+  Method* method = NULL;
+  // we need to check, this might also be an unresolved method
+  if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
+    method = getMethodFromHotSpotMethod(hotspot_method);
+  }
+#endif
+  switch (_next_call_type) {
+    case INLINE_INVOKE:
+      break;
+    case INVOKEVIRTUAL:
+    case INVOKEINTERFACE: {
+      assert(method == NULL || !method->is_static(), "cannot call static method with invokeinterface");
+
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_virtual_call_stub());
+      _instructions->relocate(call->instruction_address(),
+                                             virtual_call_Relocation::spec(_invoke_mark_pc),
+                                             Assembler::call32_operand);
+      break;
+    }
+    case INVOKESTATIC: {
+      assert(method == NULL || method->is_static(), "cannot call non-static method with invokestatic");
+
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_static_call_stub());
+      _instructions->relocate(call->instruction_address(),
+                                             relocInfo::static_call_type, Assembler::call32_operand);
+      break;
+    }
+    case INVOKESPECIAL: {
+      assert(method == NULL || !method->is_static(), "cannot call static method with invokespecial");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_opt_virtual_call_stub());
+      _instructions->relocate(call->instruction_address(),
+                              relocInfo::opt_virtual_call_type, Assembler::call32_operand);
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+static void relocate_poll_near(address pc) {
+  NativeInstruction* ni = nativeInstruction_at(pc);
+  int32_t* disp = (int32_t*) Assembler::locate_operand(pc, Assembler::disp32_operand);
+  int32_t offset = *disp; // The Java code installed the polling page offset into the disp32 operand
+  intptr_t new_disp = (intptr_t) (os::get_polling_page() + offset) - (intptr_t) ni;
+  *disp = (int32_t)new_disp;
+}
+
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+  switch (mark) {
+    case POLL_NEAR: {
+      relocate_poll_near(pc);
+      _instructions->relocate(pc, relocInfo::poll_type, Assembler::disp32_operand);
+      break;
+    }
+    case POLL_FAR:
+      // This is a load from a register so there is no relocatable operand.
+      // We just have to ensure that the format is not disp32_operand
+      // so that poll_Relocation::fix_relocation_after_move does the right
+      // thing (i.e. ignores this relocation record)
+      _instructions->relocate(pc, relocInfo::poll_type, Assembler::imm_operand);
+      break;
+    case POLL_RETURN_NEAR: {
+      relocate_poll_near(pc);
+      _instructions->relocate(pc, relocInfo::poll_return_type, Assembler::disp32_operand);
+      break;
+    }
+    case POLL_RETURN_FAR:
+      // see comment above for POLL_FAR
+      _instructions->relocate(pc, relocInfo::poll_return_type, Assembler::imm_operand);
+      break;
+    default:
+      fatal("invalid mark value");
+      break;
+  }
+}
+
+// convert JVMCI register indices (as used in oop maps) to HotSpot registers
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+  if (jvmci_reg < RegisterImpl::number_of_registers) {
+    return as_Register(jvmci_reg)->as_VMReg();
+  } else {
+    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
+    if (floatRegisterNumber < XMMRegisterImpl::number_of_registers) {
+      return as_XMMRegister(floatRegisterNumber)->as_VMReg();
+    }
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+  return !(hotspotRegister->is_FloatRegister() || hotspotRegister->is_XMMRegister());
+}
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
@ -45,6 +45,7 @@
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
+#include "crc32c.h"

 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
@ -56,8 +57,6 @@

 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")

-PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
-
 #ifdef ASSERT
 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
 #endif
@ -417,7 +416,7 @@ void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rd
    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
  }
  // Don't assert holding the ttyLock
-    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+    assert(false, "DEBUG MESSAGE: %s", msg);
  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
 }

@ -883,7 +882,7 @@ void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
    ttyLocker ttyl;
    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
                    msg);
-    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+    assert(false, "DEBUG MESSAGE: %s", msg);
  }
 }

@ -2888,7 +2887,7 @@ void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
 }

 // !defined(COMPILER2) is because of stupid core builds
-#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
+#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) || INCLUDE_JVMCI
 void MacroAssembler::empty_FPU_stack() {
  if (VM_Version::supports_mmx()) {
    emms();
@ -2896,7 +2895,7 @@ void MacroAssembler::empty_FPU_stack() {
    for (int i = 8; i-- > 0; ) ffree(i);
  }
 }
-#endif // !LP64 || C1 || !C2
+#endif // !LP64 || C1 || !C2 || INCLUDE_JVMCI


 // Defines obj, preserves var_size_in_bytes
@ -3032,6 +3031,15 @@ void MacroAssembler::fldcw(AddressLiteral src) {
  Assembler::fldcw(as_Address(src));
 }

+void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::mulpd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::mulpd(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::pow_exp_core_encoding() {
  // kills rax, rcx, rdx
  subptr(rsp,sizeof(jdouble));
@ -3104,19 +3112,7 @@ void MacroAssembler::fast_pow() {
  BLOCK_COMMENT("} fast_pow");
 }

-void MacroAssembler::fast_exp() {
-  // computes exp(X) = 2^(X * log2(e))
-  // if fast computation is not possible, result is NaN. Requires
-  // fallback from user of this macro.
-  // increase precision for intermediate steps of the computation
-  increase_precision();
-  fldl2e();                // Stack: log2(e) X ...
-  fmulp(1);                // Stack: (X*log2(e)) ...
-  pow_exp_core_encoding(); // Stack: exp(X) ...
-  restore_precision();
-}
-
-void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
+void MacroAssembler::pow_or_exp(int num_fpu_regs_in_use) {
  // kills rax, rcx, rdx
  // pow and exp needs 2 extra registers on the fpu stack.
  Label slow_case, done;
@ -3128,182 +3124,164 @@ void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
  Register tmp2 = rax;
  Register tmp3 = rcx;

-  if (is_exp) {
-    // Stack: X
-    fld_s(0);                   // duplicate argument for runtime call. Stack: X X
-    fast_exp();                 // Stack: exp(X) X
-    fcmp(tmp, 0, false, false); // Stack: exp(X) X
-    // exp(X) not equal to itself: exp(X) is NaN go to slow case.
-    jcc(Assembler::parity, slow_case);
-    // get rid of duplicate argument. Stack: exp(X)
-    if (num_fpu_regs_in_use > 0) {
-      fxch();
-      fpop();
-    } else {
-      ffree(1);
-    }
-    jmp(done);
+  // Stack: X Y
+  Label x_negative, y_not_2;
+
+  static double two = 2.0;
+  ExternalAddress two_addr((address)&two);
+
+  // constant maybe too far on 64 bit
+  lea(tmp2, two_addr);
+  fld_d(Address(tmp2, 0));    // Stack: 2 X Y
+  fcmp(tmp, 2, true, false);  // Stack: X Y
+  jcc(Assembler::parity, y_not_2);
+  jcc(Assembler::notEqual, y_not_2);
+
+  fxch(); fpop();             // Stack: X
+  fmul(0);                    // Stack: X*X
+
+  jmp(done);
+
+  bind(y_not_2);
+
+  fldz();                     // Stack: 0 X Y
+  fcmp(tmp, 1, true, false);  // Stack: X Y
+  jcc(Assembler::above, x_negative);
+
+  // X >= 0
+
+  fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
+  fld_s(1);                   // Stack: X Y X Y
+  fast_pow();                 // Stack: X^Y X Y
+  fcmp(tmp, 0, false, false); // Stack: X^Y X Y
+  // X^Y not equal to itself: X^Y is NaN go to slow case.
+  jcc(Assembler::parity, slow_case);
+  // get rid of duplicate arguments. Stack: X^Y
+  if (num_fpu_regs_in_use > 0) {
+    fxch(); fpop();
+    fxch(); fpop();
  } else {
-    // Stack: X Y
-    Label x_negative, y_not_2;
+    ffree(2);
+    ffree(1);
+  }
+  jmp(done);

-    static double two = 2.0;
-    ExternalAddress two_addr((address)&two);
+  // X <= 0
+  bind(x_negative);

-    // constant maybe too far on 64 bit
-    lea(tmp2, two_addr);
-    fld_d(Address(tmp2, 0));    // Stack: 2 X Y
-    fcmp(tmp, 2, true, false);  // Stack: X Y
-    jcc(Assembler::parity, y_not_2);
-    jcc(Assembler::notEqual, y_not_2);
+  fld_s(1);                   // Stack: Y X Y
+  frndint();                  // Stack: int(Y) X Y
+  fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
+  jcc(Assembler::notEqual, slow_case);

-    fxch(); fpop();             // Stack: X
-    fmul(0);                    // Stack: X*X
+  subptr(rsp, 8);

-    jmp(done);
-
-    bind(y_not_2);
-
-    fldz();                     // Stack: 0 X Y
-    fcmp(tmp, 1, true, false);  // Stack: X Y
-    jcc(Assembler::above, x_negative);
-
-    // X >= 0
-
-    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
-    fld_s(1);                   // Stack: X Y X Y
-    fast_pow();                 // Stack: X^Y X Y
-    fcmp(tmp, 0, false, false); // Stack: X^Y X Y
-    // X^Y not equal to itself: X^Y is NaN go to slow case.
-    jcc(Assembler::parity, slow_case);
-    // get rid of duplicate arguments. Stack: X^Y
-    if (num_fpu_regs_in_use > 0) {
-      fxch(); fpop();
-      fxch(); fpop();
-    } else {
-      ffree(2);
-      ffree(1);
-    }
-    jmp(done);
-
-    // X <= 0
-    bind(x_negative);
-
-    fld_s(1);                   // Stack: Y X Y
-    frndint();                  // Stack: int(Y) X Y
-    fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
-    jcc(Assembler::notEqual, slow_case);
-
-    subptr(rsp, 8);
-
-    // For X^Y, when X < 0, Y has to be an integer and the final
-    // result depends on whether it's odd or even. We just checked
-    // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
-    // integer to test its parity. If int(Y) is huge and doesn't fit
-    // in the 64 bit integer range, the integer indefinite value will
-    // end up in the gp registers. Huge numbers are all even, the
-    // integer indefinite number is even so it's fine.
+  // For X^Y, when X < 0, Y has to be an integer and the final
+  // result depends on whether it's odd or even. We just checked
+  // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
+  // integer to test its parity. If int(Y) is huge and doesn't fit
+  // in the 64 bit integer range, the integer indefinite value will
+  // end up in the gp registers. Huge numbers are all even, the
+  // integer indefinite number is even so it's fine.

 #ifdef ASSERT
-    // Let's check we don't end up with an integer indefinite number
-    // when not expected. First test for huge numbers: check whether
-    // int(Y)+1 == int(Y) which is true for very large numbers and
-    // those are all even. A 64 bit integer is guaranteed to not
-    // overflow for numbers where y+1 != y (when precision is set to
-    // double precision).
-    Label y_not_huge;
+  // Let's check we don't end up with an integer indefinite number
+  // when not expected. First test for huge numbers: check whether
+  // int(Y)+1 == int(Y) which is true for very large numbers and
+  // those are all even. A 64 bit integer is guaranteed to not
+  // overflow for numbers where y+1 != y (when precision is set to
+  // double precision).
+  Label y_not_huge;

-    fld1();                     // Stack: 1 int(Y) X Y
-    fadd(1);                    // Stack: 1+int(Y) int(Y) X Y
+  fld1();                     // Stack: 1 int(Y) X Y
+  fadd(1);                    // Stack: 1+int(Y) int(Y) X Y

 #ifdef _LP64
-    // trip to memory to force the precision down from double extended
-    // precision
-    fstp_d(Address(rsp, 0));
-    fld_d(Address(rsp, 0));
+  // trip to memory to force the precision down from double extended
+  // precision
+  fstp_d(Address(rsp, 0));
+  fld_d(Address(rsp, 0));
 #endif

-    fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
+  fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
 #endif

-    // move int(Y) as 64 bit integer to thread's stack
-    fistp_d(Address(rsp,0));    // Stack: X Y
+  // move int(Y) as 64 bit integer to thread's stack
+  fistp_d(Address(rsp,0));    // Stack: X Y

 #ifdef ASSERT
-    jcc(Assembler::notEqual, y_not_huge);
+  jcc(Assembler::notEqual, y_not_huge);

-    // Y is huge so we know it's even. It may not fit in a 64 bit
-    // integer and we don't want the debug code below to see the
-    // integer indefinite value so overwrite int(Y) on the thread's
-    // stack with 0.
-    movl(Address(rsp, 0), 0);
-    movl(Address(rsp, 4), 0);
+  // Y is huge so we know it's even. It may not fit in a 64 bit
+  // integer and we don't want the debug code below to see the
+  // integer indefinite value so overwrite int(Y) on the thread's
+  // stack with 0.
+  movl(Address(rsp, 0), 0);
+  movl(Address(rsp, 4), 0);

-    bind(y_not_huge);
+  bind(y_not_huge);
 #endif

-    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
-    fld_s(1);                   // Stack: X Y X Y
-    fabs();                     // Stack: abs(X) Y X Y
-    fast_pow();                 // Stack: abs(X)^Y X Y
-    fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
-    // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
+  fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
+  fld_s(1);                   // Stack: X Y X Y
+  fabs();                     // Stack: abs(X) Y X Y
+  fast_pow();                 // Stack: abs(X)^Y X Y
+  fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
+  // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.

-    pop(tmp2);
-    NOT_LP64(pop(tmp3));
-    jcc(Assembler::parity, slow_case);
+  pop(tmp2);
+  NOT_LP64(pop(tmp3));
+  jcc(Assembler::parity, slow_case);

 #ifdef ASSERT
-    // Check that int(Y) is not integer indefinite value (int
-    // overflow). Shouldn't happen because for values that would
-    // overflow, 1+int(Y)==Y which was tested earlier.
+  // Check that int(Y) is not integer indefinite value (int
+  // overflow). Shouldn't happen because for values that would
+  // overflow, 1+int(Y)==Y which was tested earlier.
 #ifndef _LP64
-    {
-      Label integer;
-      testl(tmp2, tmp2);
-      jcc(Assembler::notZero, integer);
-      cmpl(tmp3, 0x80000000);
-      jcc(Assembler::notZero, integer);
-      STOP("integer indefinite value shouldn't be seen here");
-      bind(integer);
-    }
-#else
-    {
-      Label integer;
-      mov(tmp3, tmp2); // preserve tmp2 for parity check below
-      shlq(tmp3, 1);
-      jcc(Assembler::carryClear, integer);
-      jcc(Assembler::notZero, integer);
-      STOP("integer indefinite value shouldn't be seen here");
-      bind(integer);
-    }
-#endif
-#endif
-
-    // get rid of duplicate arguments. Stack: X^Y
-    if (num_fpu_regs_in_use > 0) {
-      fxch(); fpop();
-      fxch(); fpop();
-    } else {
-      ffree(2);
-      ffree(1);
-    }
-
-    testl(tmp2, 1);
-    jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
-    // X <= 0, Y even: X^Y = -abs(X)^Y
-
-    fchs();                     // Stack: -abs(X)^Y Y
-    jmp(done);
+  {
+    Label integer;
+    testl(tmp2, tmp2);
+    jcc(Assembler::notZero, integer);
+    cmpl(tmp3, 0x80000000);
+    jcc(Assembler::notZero, integer);
+    STOP("integer indefinite value shouldn't be seen here");
+    bind(integer);
  }
+#else
+  {
+    Label integer;
+    mov(tmp3, tmp2); // preserve tmp2 for parity check below
+    shlq(tmp3, 1);
+    jcc(Assembler::carryClear, integer);
+    jcc(Assembler::notZero, integer);
+    STOP("integer indefinite value shouldn't be seen here");
+    bind(integer);
+  }
+#endif
+#endif
+
+  // get rid of duplicate arguments. Stack: X^Y
+  if (num_fpu_regs_in_use > 0) {
+    fxch(); fpop();
+    fxch(); fpop();
+  } else {
+    ffree(2);
+    ffree(1);
+  }
+
+  testl(tmp2, 1);
+  jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
+  // X <= 0, Y even: X^Y = -abs(X)^Y
+
+  fchs();                     // Stack: -abs(X)^Y Y
+  jmp(done);

  // slow case: runtime call
  bind(slow_case);

  fpop();                       // pop incorrect result or int(Y)

-  fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
-                      is_exp ? 1 : 2, num_fpu_regs_in_use);
+  fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2, num_fpu_regs_in_use);

  // Come here with result in F-TOS
  bind(done);
@ -6267,7 +6245,9 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_
    // Save caller's stack pointer into RBP if the frame pointer is preserved.
    if (PreserveFramePointer) {
      movptr(rbp, rsp);
-      addptr(rbp, framesize + wordSize);
+      if (framesize > 0) {
+        addptr(rbp, framesize);
+      }
    }
  }

@ -8636,6 +8616,471 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
  notl(crc); // ~c
 }

+#ifdef _LP64
+// S. Gueron / Information Processing Letters 112 (2012) 184
+// Algorithm 4: Computing carry-less multiplication using a precomputed lookup table.
+// Input: A 32 bit value B = [byte3, byte2, byte1, byte0].
+// Output: the 64-bit carry-less product of B * CONST
+void MacroAssembler::crc32c_ipl_alg4(Register in, uint32_t n,
+                                     Register tmp1, Register tmp2, Register tmp3) {
+  lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
+  if (n > 0) {
+    addq(tmp3, n * 256 * 8);
+  }
+  //    Q1 = TABLEExt[n][B & 0xFF];
+  movl(tmp1, in);
+  andl(tmp1, 0x000000FF);
+  shll(tmp1, 3);
+  addq(tmp1, tmp3);
+  movq(tmp1, Address(tmp1, 0));
+
+  //    Q2 = TABLEExt[n][B >> 8 & 0xFF];
+  movl(tmp2, in);
+  shrl(tmp2, 8);
+  andl(tmp2, 0x000000FF);
+  shll(tmp2, 3);
+  addq(tmp2, tmp3);
+  movq(tmp2, Address(tmp2, 0));
+
+  shlq(tmp2, 8);
+  xorq(tmp1, tmp2);
+
+  //    Q3 = TABLEExt[n][B >> 16 & 0xFF];
+  movl(tmp2, in);
+  shrl(tmp2, 16);
+  andl(tmp2, 0x000000FF);
+  shll(tmp2, 3);
+  addq(tmp2, tmp3);
+  movq(tmp2, Address(tmp2, 0));
+
+  shlq(tmp2, 16);
+  xorq(tmp1, tmp2);
+
+  //    Q4 = TABLEExt[n][B >> 24 & 0xFF];
+  shrl(in, 24);
+  andl(in, 0x000000FF);
+  shll(in, 3);
+  addq(in, tmp3);
+  movq(in, Address(in, 0));
+
+  shlq(in, 24);
+  xorq(in, tmp1);
+  //    return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
+}
+
+void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
+                                      Register in_out,
+                                      uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
+                                      XMMRegister w_xtmp2,
+                                      Register tmp1,
+                                      Register n_tmp2, Register n_tmp3) {
+  if (is_pclmulqdq_supported) {
+    movdl(w_xtmp1, in_out); // modified blindly
+
+    movl(tmp1, const_or_pre_comp_const_index);
+    movdl(w_xtmp2, tmp1);
+    pclmulqdq(w_xtmp1, w_xtmp2, 0);
+
+    movdq(in_out, w_xtmp1);
+  } else {
+    crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3);
+  }
+}
+
+// Recombination Alternative 2: No bit-reflections
+// T1 = (CRC_A * U1) << 1
+// T2 = (CRC_B * U2) << 1
+// C1 = T1 >> 32
+// C2 = T2 >> 32
+// T1 = T1 & 0xFFFFFFFF
+// T2 = T2 & 0xFFFFFFFF
+// T1 = CRC32(0, T1)
+// T2 = CRC32(0, T2)
+// C1 = C1 ^ T1
+// C2 = C2 ^ T2
+// CRC = C1 ^ C2 ^ CRC_C
+void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
+                                     XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
+                                     Register tmp1, Register tmp2,
+                                     Register n_tmp3) {
+  crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
+  crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
+  shlq(in_out, 1);
+  movl(tmp1, in_out);
+  shrq(in_out, 32);
+  xorl(tmp2, tmp2);
+  crc32(tmp2, tmp1, 4);
+  xorl(in_out, tmp2); // we don't care about upper 32 bit contents here
+  shlq(in1, 1);
+  movl(tmp1, in1);
+  shrq(in1, 32);
+  xorl(tmp2, tmp2);
+  crc32(tmp2, tmp1, 4);
+  xorl(in1, tmp2);
+  xorl(in_out, in1);
+  xorl(in_out, in2);
+}
+
+// Set N to predefined value
+// Subtract from a lenght of a buffer
+// execute in a loop:
+// CRC_A = 0xFFFFFFFF, CRC_B = 0, CRC_C = 0
+// for i = 1 to N do
+//  CRC_A = CRC32(CRC_A, A[i])
+//  CRC_B = CRC32(CRC_B, B[i])
+//  CRC_C = CRC32(CRC_C, C[i])
+// end for
+// Recombine
+void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
+                                       Register in_out1, Register in_out2, Register in_out3,
+                                       Register tmp1, Register tmp2, Register tmp3,
+                                       XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
+                                       Register tmp4, Register tmp5,
+                                       Register n_tmp6) {
+  Label L_processPartitions;
+  Label L_processPartition;
+  Label L_exit;
+
+  bind(L_processPartitions);
+  cmpl(in_out1, 3 * size);
+  jcc(Assembler::less, L_exit);
+    xorl(tmp1, tmp1);
+    xorl(tmp2, tmp2);
+    movq(tmp3, in_out2);
+    addq(tmp3, size);
+
+    bind(L_processPartition);
+      crc32(in_out3, Address(in_out2, 0), 8);
+      crc32(tmp1, Address(in_out2, size), 8);
+      crc32(tmp2, Address(in_out2, size * 2), 8);
+      addq(in_out2, 8);
+      cmpq(in_out2, tmp3);
+      jcc(Assembler::less, L_processPartition);
+    crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
+            w_xtmp1, w_xtmp2, w_xtmp3,
+            tmp4, tmp5,
+            n_tmp6);
+    addq(in_out2, 2 * size);
+    subl(in_out1, 3 * size);
+    jmp(L_processPartitions);
+
+  bind(L_exit);
+}
+#else
+void MacroAssembler::crc32c_ipl_alg4(Register in_out, uint32_t n,
+                                     Register tmp1, Register tmp2, Register tmp3,
+                                     XMMRegister xtmp1, XMMRegister xtmp2) {
+  lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
+  if (n > 0) {
+    addl(tmp3, n * 256 * 8);
+  }
+  //    Q1 = TABLEExt[n][B & 0xFF];
+  movl(tmp1, in_out);
+  andl(tmp1, 0x000000FF);
+  shll(tmp1, 3);
+  addl(tmp1, tmp3);
+  movq(xtmp1, Address(tmp1, 0));
+
+  //    Q2 = TABLEExt[n][B >> 8 & 0xFF];
+  movl(tmp2, in_out);
+  shrl(tmp2, 8);
+  andl(tmp2, 0x000000FF);
+  shll(tmp2, 3);
+  addl(tmp2, tmp3);
+  movq(xtmp2, Address(tmp2, 0));
+
+  psllq(xtmp2, 8);
+  pxor(xtmp1, xtmp2);
+
+  //    Q3 = TABLEExt[n][B >> 16 & 0xFF];
+  movl(tmp2, in_out);
+  shrl(tmp2, 16);
+  andl(tmp2, 0x000000FF);
+  shll(tmp2, 3);
+  addl(tmp2, tmp3);
+  movq(xtmp2, Address(tmp2, 0));
+
+  psllq(xtmp2, 16);
+  pxor(xtmp1, xtmp2);
+
+  //    Q4 = TABLEExt[n][B >> 24 & 0xFF];
+  shrl(in_out, 24);
+  andl(in_out, 0x000000FF);
+  shll(in_out, 3);
+  addl(in_out, tmp3);
+  movq(xtmp2, Address(in_out, 0));
+
+  psllq(xtmp2, 24);
+  pxor(xtmp1, xtmp2); // Result in CXMM
+  //    return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
+}
+
+void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
+                                      Register in_out,
+                                      uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
+                                      XMMRegister w_xtmp2,
+                                      Register tmp1,
+                                      Register n_tmp2, Register n_tmp3) {
+  if (is_pclmulqdq_supported) {
+    movdl(w_xtmp1, in_out);
+
+    movl(tmp1, const_or_pre_comp_const_index);
+    movdl(w_xtmp2, tmp1);
+    pclmulqdq(w_xtmp1, w_xtmp2, 0);
+    // Keep result in XMM since GPR is 32 bit in length
+  } else {
+    crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3, w_xtmp1, w_xtmp2);
+  }
+}
+
+void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
+                                     XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
+                                     Register tmp1, Register tmp2,
+                                     Register n_tmp3) {
+  crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
+  crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
+
+  psllq(w_xtmp1, 1);
+  movdl(tmp1, w_xtmp1);
+  psrlq(w_xtmp1, 32);
+  movdl(in_out, w_xtmp1);
+
+  xorl(tmp2, tmp2);
+  crc32(tmp2, tmp1, 4);
+  xorl(in_out, tmp2);
+
+  psllq(w_xtmp2, 1);
+  movdl(tmp1, w_xtmp2);
+  psrlq(w_xtmp2, 32);
+  movdl(in1, w_xtmp2);
+
+  xorl(tmp2, tmp2);
+  crc32(tmp2, tmp1, 4);
+  xorl(in1, tmp2);
+  xorl(in_out, in1);
+  xorl(in_out, in2);
+}
+
+void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
+                                       Register in_out1, Register in_out2, Register in_out3,
+                                       Register tmp1, Register tmp2, Register tmp3,
+                                       XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
+                                       Register tmp4, Register tmp5,
+                                       Register n_tmp6) {
+  Label L_processPartitions;
+  Label L_processPartition;
+  Label L_exit;
+
+  bind(L_processPartitions);
+  cmpl(in_out1, 3 * size);
+  jcc(Assembler::less, L_exit);
+    xorl(tmp1, tmp1);
+    xorl(tmp2, tmp2);
+    movl(tmp3, in_out2);
+    addl(tmp3, size);
+
+    bind(L_processPartition);
+      crc32(in_out3, Address(in_out2, 0), 4);
+      crc32(tmp1, Address(in_out2, size), 4);
+      crc32(tmp2, Address(in_out2, size*2), 4);
+      crc32(in_out3, Address(in_out2, 0+4), 4);
+      crc32(tmp1, Address(in_out2, size+4), 4);
+      crc32(tmp2, Address(in_out2, size*2+4), 4);
+      addl(in_out2, 8);
+      cmpl(in_out2, tmp3);
+      jcc(Assembler::less, L_processPartition);
+
+        push(tmp3);
+        push(in_out1);
+        push(in_out2);
+        tmp4 = tmp3;
+        tmp5 = in_out1;
+        n_tmp6 = in_out2;
+
+      crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
+            w_xtmp1, w_xtmp2, w_xtmp3,
+            tmp4, tmp5,
+            n_tmp6);
+
+        pop(in_out2);
+        pop(in_out1);
+        pop(tmp3);
+
+    addl(in_out2, 2 * size);
+    subl(in_out1, 3 * size);
+    jmp(L_processPartitions);
+
+  bind(L_exit);
+}
+#endif //LP64
+
+#ifdef _LP64
+// Algorithm 2: Pipelined usage of the CRC32 instruction.
+// Input: A buffer I of L bytes.
+// Output: the CRC32C value of the buffer.
+// Notations:
+// Write L = 24N + r, with N = floor (L/24).
+// r = L mod 24 (0 <= r < 24).
+// Consider I as the concatenation of A|B|C|R, where A, B, C, each,
+// N quadwords, and R consists of r bytes.
+// A[j] = I [8j+7:8j], j= 0, 1, ..., N-1
+// B[j] = I [N + 8j+7:N + 8j], j= 0, 1, ..., N-1
+// C[j] = I [2N + 8j+7:2N + 8j], j= 0, 1, ..., N-1
+// if r > 0 R[j] = I [3N +j], j= 0, 1, ...,r-1
+void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
+                                          Register tmp1, Register tmp2, Register tmp3,
+                                          Register tmp4, Register tmp5, Register tmp6,
+                                          XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
+                                          bool is_pclmulqdq_supported) {
+  uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
+  Label L_wordByWord;
+  Label L_byteByByteProlog;
+  Label L_byteByByte;
+  Label L_exit;
+
+  if (is_pclmulqdq_supported ) {
+    const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
+    const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr+1);
+
+    const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
+    const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
+
+    const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
+    const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
+    assert((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5, "Checking whether you declared all of the constants based on the number of \"chunks\"");
+  } else {
+    const_or_pre_comp_const_index[0] = 1;
+    const_or_pre_comp_const_index[1] = 0;
+
+    const_or_pre_comp_const_index[2] = 3;
+    const_or_pre_comp_const_index[3] = 2;
+
+    const_or_pre_comp_const_index[4] = 5;
+    const_or_pre_comp_const_index[5] = 4;
+   }
+  crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
+                    in2, in1, in_out,
+                    tmp1, tmp2, tmp3,
+                    w_xtmp1, w_xtmp2, w_xtmp3,
+                    tmp4, tmp5,
+                    tmp6);
+  crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
+                    in2, in1, in_out,
+                    tmp1, tmp2, tmp3,
+                    w_xtmp1, w_xtmp2, w_xtmp3,
+                    tmp4, tmp5,
+                    tmp6);
+  crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
+                    in2, in1, in_out,
+                    tmp1, tmp2, tmp3,
+                    w_xtmp1, w_xtmp2, w_xtmp3,
+                    tmp4, tmp5,
+                    tmp6);
+  movl(tmp1, in2);
+  andl(tmp1, 0x00000007);
+  negl(tmp1);
+  addl(tmp1, in2);
+  addq(tmp1, in1);
+
+  BIND(L_wordByWord);
+  cmpq(in1, tmp1);
+  jcc(Assembler::greaterEqual, L_byteByByteProlog);
+    crc32(in_out, Address(in1, 0), 4);
+    addq(in1, 4);
+    jmp(L_wordByWord);
+
+  BIND(L_byteByByteProlog);
+  andl(in2, 0x00000007);
+  movl(tmp2, 1);
+
+  BIND(L_byteByByte);
+  cmpl(tmp2, in2);
+  jccb(Assembler::greater, L_exit);
+    crc32(in_out, Address(in1, 0), 1);
+    incq(in1);
+    incl(tmp2);
+    jmp(L_byteByByte);
+
+  BIND(L_exit);
+}
+#else
+void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
+                                          Register tmp1, Register  tmp2, Register tmp3,
+                                          Register tmp4, Register  tmp5, Register tmp6,
+                                          XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
+                                          bool is_pclmulqdq_supported) {
+  uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
+  Label L_wordByWord;
+  Label L_byteByByteProlog;
+  Label L_byteByByte;
+  Label L_exit;
+
+  if (is_pclmulqdq_supported) {
+    const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
+    const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1);
+
+    const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
+    const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
+
+    const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
+    const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
+  } else {
+    const_or_pre_comp_const_index[0] = 1;
+    const_or_pre_comp_const_index[1] = 0;
+
+    const_or_pre_comp_const_index[2] = 3;
+    const_or_pre_comp_const_index[3] = 2;
+
+    const_or_pre_comp_const_index[4] = 5;
+    const_or_pre_comp_const_index[5] = 4;
+  }
+  crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
+                    in2, in1, in_out,
+                    tmp1, tmp2, tmp3,
+                    w_xtmp1, w_xtmp2, w_xtmp3,
+                    tmp4, tmp5,
+                    tmp6);
+  crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
+                    in2, in1, in_out,
+                    tmp1, tmp2, tmp3,
+                    w_xtmp1, w_xtmp2, w_xtmp3,
+                    tmp4, tmp5,
+                    tmp6);
+  crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
+                    in2, in1, in_out,
+                    tmp1, tmp2, tmp3,
+                    w_xtmp1, w_xtmp2, w_xtmp3,
+                    tmp4, tmp5,
+                    tmp6);
+  movl(tmp1, in2);
+  andl(tmp1, 0x00000007);
+  negl(tmp1);
+  addl(tmp1, in2);
+  addl(tmp1, in1);
+
+  BIND(L_wordByWord);
+  cmpl(in1, tmp1);
+  jcc(Assembler::greaterEqual, L_byteByByteProlog);
+    crc32(in_out, Address(in1,0), 4);
+    addl(in1, 4);
+    jmp(L_wordByWord);
+
+  BIND(L_byteByByteProlog);
+  andl(in2, 0x00000007);
+  movl(tmp2, 1);
+
+  BIND(L_byteByByte);
+  cmpl(tmp2, in2);
+  jccb(Assembler::greater, L_exit);
+    movb(tmp1, Address(in1, 0));
+    crc32(in_out, tmp1, 1);
+    incl(in1);
+    incl(tmp2);
+    jmp(L_byteByByte);
+
+  BIND(L_exit);
+}
+#endif // LP64
 #undef BIND
 #undef BLOCK_COMMENT

--- a/Show More
+++ b/Show More