Merge

2026-02-20 07:15:31 +00:00 · 2014-04-01 17:27:19 -07:00 · 2014-04-01 17:27:19 -07:00 · 4452e6316f
commit 4452e6316f
parent e482a97181 958938e6a4
240 changed files with 10936 additions and 2375 deletions
--- a/hotspot/agent/src/os/bsd/MacosxDebuggerLocal.m
+++ b/hotspot/agent/src/os/bsd/MacosxDebuggerLocal.m
@ -95,7 +95,9 @@ static task_t getTask(JNIEnv *env, jobject this_obj) {
 #define CHECK_EXCEPTION_CLEAR_(value) if ((*env)->ExceptionOccurred(env)) { (*env)->ExceptionClear(env); return value; } 

 static void throw_new_debugger_exception(JNIEnv* env, const char* errMsg) {
-  (*env)->ThrowNew(env, (*env)->FindClass(env, "sun/jvm/hotspot/debugger/DebuggerException"), errMsg);
+  jclass exceptionClass = (*env)->FindClass(env, "sun/jvm/hotspot/debugger/DebuggerException");
+  CHECK_EXCEPTION;
+  (*env)->ThrowNew(env, exceptionClass, errMsg);
 }

 static struct ps_prochandle* get_proc_handle(JNIEnv* env, jobject this_obj) {
@ -129,6 +131,7 @@ static struct ps_prochandle* get_proc_handle(JNIEnv* env, jobject this_obj) {
 JNIEXPORT void JNICALL 
 Java_sun_jvm_hotspot_debugger_bsd_BsdDebuggerLocal_init0(JNIEnv *env, jclass cls) {
  symbolicatorID = (*env)->GetFieldID(env, cls, "symbolicator", "J");
+  CHECK_EXCEPTION;
  taskID = (*env)->GetFieldID(env, cls, "task", "J");
  CHECK_EXCEPTION;

@ -236,13 +239,16 @@ JNIEXPORT jobject JNICALL Java_sun_jvm_hotspot_debugger_bsd_BsdDebuggerLocal_loo
  (JNIEnv *env, jobject this_obj, jlong addr) {
  uintptr_t offset;
  const char* sym = NULL;
+  jstring sym_string;

  struct ps_prochandle* ph = get_proc_handle(env, this_obj);
  if (ph != NULL && ph->core != NULL) {
    sym = symbol_for_pc(ph, (uintptr_t) addr, &offset);
    if (sym == NULL) return 0;
+    sym_string = (*env)->NewStringUTF(env, sym);
+    CHECK_EXCEPTION_(0);
    return (*env)->CallObjectMethod(env, this_obj, createClosestSymbol_ID,
-                          (*env)->NewStringUTF(env, sym), (jlong)offset);
+                                                sym_string, (jlong)offset);
  }
  return 0;
 }
@ -749,11 +755,14 @@ static void fillLoadObjects(JNIEnv* env, jobject this_obj, struct ps_prochandle*
     const char* name;
     jobject loadObject;
     jobject loadObjectList;
+     jstring nameString;

     base = get_lib_base(ph, i);
     name = get_lib_name(ph, i);
+     nameString = (*env)->NewStringUTF(env, name);
+     CHECK_EXCEPTION;
     loadObject = (*env)->CallObjectMethod(env, this_obj, createLoadObject_ID,
-                                   (*env)->NewStringUTF(env, name), (jlong)0, (jlong)base);
+                                            nameString, (jlong)0, (jlong)base);
     CHECK_EXCEPTION;
     loadObjectList = (*env)->GetObjectField(env, this_obj, loadObjectList_ID);
     CHECK_EXCEPTION;
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java
@ -51,9 +51,9 @@ public class G1CollectedHeap extends SharedHeap {
    static private CIntegerField summaryBytesUsedField;
    // G1MonitoringSupport* _g1mm;
    static private AddressField g1mmField;
-    // MasterOldRegionSet _old_set;
+    // HeapRegionSet _old_set;
    static private long oldSetFieldOffset;
-    // MasterHumongousRegionSet _humongous_set;
+    // HeapRegionSet _humongous_set;
    static private long humongousSetFieldOffset;

    static {
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java
@ -40,12 +40,8 @@ import sun.jvm.hotspot.types.TypeDataBase;
 // Mirror class for HeapRegionSetBase. Represents a group of regions.

 public class HeapRegionSetBase extends VMObject {
-    // uint _length;
-    static private CIntegerField lengthField;
-    // uint _region_num;
-    static private CIntegerField regionNumField;
-    // size_t _total_used_bytes;
-    static private CIntegerField totalUsedBytesField;
+
+    static private long countField;

    static {
        VM.registerVMInitializedObserver(new Observer() {
@ -58,21 +54,13 @@ public class HeapRegionSetBase extends VMObject {
    static private synchronized void initialize(TypeDataBase db) {
        Type type = db.lookupType("HeapRegionSetBase");

-        lengthField         = type.getCIntegerField("_length");
-        regionNumField      = type.getCIntegerField("_region_num");
-        totalUsedBytesField = type.getCIntegerField("_total_used_bytes");
+        countField = type.getField("_count").getOffset();
    }

-    public long length() {
-        return lengthField.getValue(addr);
-    }

-    public long regionNum() {
-        return regionNumField.getValue(addr);
-    }
-
-    public long totalUsedBytes() {
-        return totalUsedBytesField.getValue(addr);
+    public HeapRegionSetCount count() {
+        Address countFieldAddr = addr.addOffsetTo(countField);
+        return (HeapRegionSetCount) VMObjectFactory.newObject(HeapRegionSetCount.class, countFieldAddr);
    }

    public HeapRegionSetBase(Address addr) {
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetCount.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetCount.java
@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.gc_implementation.g1;
+
+import java.util.Iterator;
+import java.util.Observable;
+import java.util.Observer;
+
+import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.runtime.VMObject;
+import sun.jvm.hotspot.runtime.VMObjectFactory;
+import sun.jvm.hotspot.types.AddressField;
+import sun.jvm.hotspot.types.CIntegerField;
+import sun.jvm.hotspot.types.Type;
+import sun.jvm.hotspot.types.TypeDataBase;
+
+// Mirror class for HeapRegionSetCount. Represents a group of regions.
+
+public class HeapRegionSetCount extends VMObject {
+
+    static private CIntegerField lengthField;
+    static private CIntegerField capacityField;
+
+    static {
+        VM.registerVMInitializedObserver(new Observer() {
+                public void update(Observable o, Object data) {
+                    initialize(VM.getVM().getTypeDataBase());
+                }
+            });
+    }
+
+    static private synchronized void initialize(TypeDataBase db) {
+        Type type = db.lookupType("HeapRegionSetCount");
+
+        lengthField   = type.getCIntegerField("_length");
+        capacityField = type.getCIntegerField("_capacity");
+    }
+
+    public long length() {
+        return lengthField.getValue(addr);
+    }
+
+    public long capacity() {
+        return capacityField.getValue(addr);
+    }
+
+    public HeapRegionSetCount(Address addr) {
+        super(addr);
+    }
+}
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java
@ -114,7 +114,8 @@ public class HeapSummary extends Tool {
             long survivorRegionNum = g1mm.survivorRegionNum();
             HeapRegionSetBase oldSet = g1h.oldSet();
             HeapRegionSetBase humongousSet = g1h.humongousSet();
-             long oldRegionNum = oldSet.regionNum() + humongousSet.regionNum();
+             long oldRegionNum = oldSet.count().length()
+                          + humongousSet.count().capacity() / HeapRegion.grainBytes();
             printG1Space("G1 Heap:", g1h.n_regions(),
                          g1h.used(), g1h.capacity());
             System.out.println("G1 Young Generation:");
--- a/hotspot/make/Makefile
+++ b/hotspot/make/Makefile
@ -287,8 +287,43 @@ else
 	@$(ECHO) "Error: trying to build a minimal target but JVM_VARIANT_MINIMAL1 is not true."
 endif

+remove_old_debuginfo:
+ifeq ($(JVM_VARIANT_CLIENT), true)
+  ifeq ($(ZIP_DEBUGINFO_FILES),1)
+    ifeq ($(OSNAME), windows)
+	  $(RM) -f $(EXPORT_CLIENT_DIR)/jvm.map $(EXPORT_CLIENT_DIR)/jvm.pdb
+    else
+	  $(RM) -f $(EXPORT_CLIENT_DIR)/libjvm.debuginfo
+    endif
+  else
+	$(RM) -f $(EXPORT_CLIENT_DIR)/libjvm.diz
+  endif
+endif
+ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
+  ifeq ($(ZIP_DEBUGINFO_FILES),1)
+    ifeq ($(OSNAME), windows)
+	  $(RM) -f $(EXPORT_SERVER_DIR)/jvm.map $(EXPORT_SERVER_DIR)/jvm.pdb
+    else
+      ifeq ($(OS_VENDOR), Darwin)
+	    $(RM) -rf $(EXPORT_SERVER_DIR)/libjvm.dylib.dSYM
+      else
+	    $(RM) -f $(EXPORT_SERVER_DIR)/libjvm.debuginfo
+      endif
+    endif
+  else
+	$(RM) -f $(EXPORT_SERVER_DIR)/libjvm.diz
+  endif
+endif
+ifeq ($(JVM_VARIANT_MINIMAL1),true)
+  ifeq ($(ZIP_DEBUGINFO_FILES),1)
+	$(RM) -f $(EXPORT_MINIMAL_DIR)/libjvm.debuginfo
+  else
+	$(RM) -f $(EXPORT_MINIMAL_DIR)/libjvm.diz
+  endif
+endif
+
 # Export file rule
-generic_export: $(EXPORT_LIST)
+generic_export: $(EXPORT_LIST) remove_old_debuginfo

 export_product:
 	$(MAKE) BUILD_FLAVOR=$(@:export_%=%) generic_export
@ -841,4 +876,4 @@ include $(GAMMADIR)/make/jprt.gmk
 	export_jdk_product export_jdk_fastdebug export_jdk_debug \
 	create_jdk copy_jdk update_jdk test_jdk \
 	copy_product_jdk copy_fastdebug_jdk copy_debug_jdk  \
-	$(HS_ALT_MAKE)/Makefile.make
+	$(HS_ALT_MAKE)/Makefile.make remove_old_debuginfo
--- a/hotspot/make/aix/makefiles/vm.make
+++ b/hotspot/make/aix/makefiles/vm.make
@ -101,7 +101,7 @@ CXXFLAGS =           \
 # This is VERY important! The version define must only be supplied to vm_version.o
 # If not, ccache will not re-use the cache at all, since the version string might contain
 # a time and date.
-vm_version.o: CXXFLAGS += ${JRE_VERSION}
+CXXFLAGS/vm_version.o += ${JRE_VERSION}

 CXXFLAGS/BYFILE = $(CXXFLAGS/$@)

--- a/hotspot/make/excludeSrc.make
+++ b/hotspot/make/excludeSrc.make
@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@ -87,9 +87,10 @@ ifeq ($(INCLUDE_ALL_GCS), false)
 	g1BlockOffsetTable.cpp g1CardCounts.cpp g1CollectedHeap.cpp g1CollectorPolicy.cpp \
 	g1ErgoVerbose.cpp g1GCPhaseTimes.cpp g1HRPrinter.cpp g1HotCardCache.cpp g1Log.cpp \
 	g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp g1OopClosures.cpp \
-	g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1_globals.cpp heapRegion.cpp \
+	g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1StringDedup.cpp g1StringDedupStat.cpp \
+	g1StringDedupTable.cpp g1StringDedupThread.cpp g1StringDedupQueue.cpp g1_globals.cpp heapRegion.cpp \
 	g1BiasedArray.cpp heapRegionRemSet.cpp heapRegionSeq.cpp heapRegionSet.cpp heapRegionSets.cpp \
-	ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp \
+	ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp g1CodeCacheRemSet.cpp \
 	adjoiningGenerations.cpp adjoiningVirtualSpaces.cpp asPSOldGen.cpp asPSYoungGen.cpp \
 	cardTableExtension.cpp gcTaskManager.cpp gcTaskThread.cpp objectStartArray.cpp \
 	parallelScavengeHeap.cpp parMarkBitMap.cpp pcTasks.cpp psAdaptiveSizePolicy.cpp \
--- a/hotspot/make/jprt.properties
+++ b/hotspot/make/jprt.properties
@ -33,7 +33,7 @@ jprt.need.sibling.build=false

 # This tells jprt what default release we want to build

-jprt.hotspot.default.release=jdk8
+jprt.hotspot.default.release=jdk9

 jprt.tools.default.release=${jprt.submit.option.release?${jprt.submit.option.release}:${jprt.hotspot.default.release}}

@ -47,72 +47,50 @@ jprt.sync.push=false
 #       sparc etc.

 # Define the Solaris platforms we want for the various releases
-jprt.my.solaris.sparcv9.jdk8=solaris_sparcv9_5.10
-jprt.my.solaris.sparcv9.jdk7=solaris_sparcv9_5.10
-jprt.my.solaris.sparcv9.jdk7u8=${jprt.my.solaris.sparcv9.jdk7}
+jprt.my.solaris.sparcv9.jdk9=solaris_sparcv9_5.10
 jprt.my.solaris.sparcv9=${jprt.my.solaris.sparcv9.${jprt.tools.default.release}}

-jprt.my.solaris.x64.jdk8=solaris_x64_5.10
-jprt.my.solaris.x64.jdk7=solaris_x64_5.10
-jprt.my.solaris.x64.jdk7u8=${jprt.my.solaris.x64.jdk7}
+jprt.my.solaris.x64.jdk9=solaris_x64_5.10
 jprt.my.solaris.x64=${jprt.my.solaris.x64.${jprt.tools.default.release}}

-jprt.my.linux.i586.jdk8=linux_i586_2.6
-jprt.my.linux.i586.jdk7=linux_i586_2.6
-jprt.my.linux.i586.jdk7u8=${jprt.my.linux.i586.jdk7}
+jprt.my.linux.i586.jdk9=linux_i586_2.6
 jprt.my.linux.i586=${jprt.my.linux.i586.${jprt.tools.default.release}}

-jprt.my.linux.x64.jdk8=linux_x64_2.6
-jprt.my.linux.x64.jdk7=linux_x64_2.6
-jprt.my.linux.x64.jdk7u8=${jprt.my.linux.x64.jdk7}
+jprt.my.linux.x64.jdk9=linux_x64_2.6
 jprt.my.linux.x64=${jprt.my.linux.x64.${jprt.tools.default.release}}

-jprt.my.linux.ppc.jdk8=linux_ppc_2.6
-jprt.my.linux.ppc.jdk7=linux_ppc_2.6
-jprt.my.linux.ppc.jdk7u8=${jprt.my.linux.ppc.jdk7}
+jprt.my.linux.ppc.jdk9=linux_ppc_2.6
 jprt.my.linux.ppc=${jprt.my.linux.ppc.${jprt.tools.default.release}}

-jprt.my.linux.ppcv2.jdk8=linux_ppcv2_2.6
-jprt.my.linux.ppcv2.jdk7=linux_ppcv2_2.6
-jprt.my.linux.ppcv2.jdk7u8=${jprt.my.linux.ppcv2.jdk7}
+jprt.my.linux.ppcv2.jdk9=linux_ppcv2_2.6
 jprt.my.linux.ppcv2=${jprt.my.linux.ppcv2.${jprt.tools.default.release}}

-jprt.my.linux.ppcsflt.jdk8=linux_ppcsflt_2.6
-jprt.my.linux.ppcsflt.jdk7=linux_ppcsflt_2.6
-jprt.my.linux.ppcsflt.jdk7u8=${jprt.my.linux.ppcsflt.jdk7}
+jprt.my.linux.ppcsflt.jdk9=linux_ppcsflt_2.6
 jprt.my.linux.ppcsflt=${jprt.my.linux.ppcsflt.${jprt.tools.default.release}}

-jprt.my.linux.armvfpsflt.jdk8=linux_armvfpsflt_2.6
+jprt.my.linux.armvfpsflt.jdk9=linux_armvfpsflt_2.6
 jprt.my.linux.armvfpsflt=${jprt.my.linux.armvfpsflt.${jprt.tools.default.release}}

-jprt.my.linux.armvfphflt.jdk8=linux_armvfphflt_2.6
+jprt.my.linux.armvfphflt.jdk9=linux_armvfphflt_2.6
 jprt.my.linux.armvfphflt=${jprt.my.linux.armvfphflt.${jprt.tools.default.release}}

 # The ARM GP vfp-sflt build is not currently supported
-#jprt.my.linux.armvs.jdk8=linux_armvs_2.6
+#jprt.my.linux.armvs.jdk9=linux_armvs_2.6
 #jprt.my.linux.armvs=${jprt.my.linux.armvs.${jprt.tools.default.release}}

-jprt.my.linux.armvh.jdk8=linux_armvh_2.6
+jprt.my.linux.armvh.jdk9=linux_armvh_2.6
 jprt.my.linux.armvh=${jprt.my.linux.armvh.${jprt.tools.default.release}}

-jprt.my.linux.armsflt.jdk8=linux_armsflt_2.6
-jprt.my.linux.armsflt.jdk7=linux_armsflt_2.6
-jprt.my.linux.armsflt.jdk7u8=${jprt.my.linux.armsflt.jdk7}
+jprt.my.linux.armsflt.jdk9=linux_armsflt_2.6
 jprt.my.linux.armsflt=${jprt.my.linux.armsflt.${jprt.tools.default.release}}

-jprt.my.macosx.x64.jdk8=macosx_x64_10.7
-jprt.my.macosx.x64.jdk7=macosx_x64_10.7
-jprt.my.macosx.x64.jdk7u8=${jprt.my.macosx.x64.jdk7}
+jprt.my.macosx.x64.jdk9=macosx_x64_10.7
 jprt.my.macosx.x64=${jprt.my.macosx.x64.${jprt.tools.default.release}}

-jprt.my.windows.i586.jdk8=windows_i586_6.1
-jprt.my.windows.i586.jdk7=windows_i586_6.1
-jprt.my.windows.i586.jdk7u8=${jprt.my.windows.i586.jdk7}
+jprt.my.windows.i586.jdk9=windows_i586_6.1
 jprt.my.windows.i586=${jprt.my.windows.i586.${jprt.tools.default.release}}

-jprt.my.windows.x64.jdk8=windows_x64_6.1
-jprt.my.windows.x64.jdk7=windows_x64_6.1
-jprt.my.windows.x64.jdk7u8=${jprt.my.windows.x64.jdk7}
+jprt.my.windows.x64.jdk9=windows_x64_6.1
 jprt.my.windows.x64=${jprt.my.windows.x64.${jprt.tools.default.release}}

 # Standard list of jprt build targets for this source tree
@ -143,9 +121,7 @@ jprt.build.targets.embedded= \
 jprt.build.targets.all=${jprt.build.targets.standard}, \
    ${jprt.build.targets.embedded}, ${jprt.build.targets.open}

-jprt.build.targets.jdk8=${jprt.build.targets.all}
-jprt.build.targets.jdk7=${jprt.build.targets.all}
-jprt.build.targets.jdk7u8=${jprt.build.targets.all}
+jprt.build.targets.jdk9=${jprt.build.targets.all}
 jprt.build.targets=${jprt.build.targets.${jprt.tools.default.release}}

 # Subset lists of test targets for this source tree
@ -349,9 +325,7 @@ jprt.test.targets.embedded= 		\
  ${jprt.my.windows.i586.test.targets}, \
  ${jprt.my.windows.x64.test.targets}

-jprt.test.targets.jdk8=${jprt.test.targets.standard}
-jprt.test.targets.jdk7=${jprt.test.targets.standard}
-jprt.test.targets.jdk7u8=${jprt.test.targets.jdk7}
+jprt.test.targets.jdk9=${jprt.test.targets.standard}
 jprt.test.targets=${jprt.test.targets.${jprt.tools.default.release}}

 # The default test/Makefile targets that should be run
@ -399,9 +373,7 @@ jprt.make.rule.test.targets.standard = \
 jprt.make.rule.test.targets.embedded = \
  ${jprt.make.rule.test.targets.standard.client}

-jprt.make.rule.test.targets.jdk8=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk7=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk7u8=${jprt.make.rule.test.targets.jdk7}
+jprt.make.rule.test.targets.jdk9=${jprt.make.rule.test.targets.standard}
 jprt.make.rule.test.targets=${jprt.make.rule.test.targets.${jprt.tools.default.release}}

 # 7155453: Work-around to prevent popups on OSX from blocking test completion
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -24,7 +24,6 @@
 */

 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
 #include "asm/assembler.inline.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "interpreter/interpreter.hpp"
@ -37,6 +36,7 @@
 #include "runtime/os.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
@ -384,10 +384,10 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur
      bool load_xa = (xa != 0) || (xb < 0);
      bool return_xd = false;

-      if (load_xa) lis(tmp, xa);
-      if (xc) lis(d, xc);
+      if (load_xa) { lis(tmp, xa); }
+      if (xc) { lis(d, xc); }
      if (load_xa) {
-        if (xb) ori(tmp, tmp, xb); // No addi, we support tmp == R0.
+        if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
      } else {
        li(tmp, xb); // non-negative
      }
@ -409,18 +409,18 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur
    // opt 4: avoid adding 0
    if (xa) { // Highest 16-bit needed?
      lis(d, xa);
-      if (xb) addi(d, d, xb);
+      if (xb) { addi(d, d, xb); }
    } else {
      li(d, xb);
    }
    sldi(d, d, 32);
-    if (xc) addis(d, d, xc);
+    if (xc) { addis(d, d, xc); }
  }

  // opt 5: Return offset to be inserted into following instruction.
  if (return_simm16_rest) return xd;

-  if (xd) addi(d, d, xd);
+  if (xd) { addi(d, d, xd); }
  return 0;
 }

@ -696,4 +696,5 @@ void Assembler::test_asm() {
  tty->print_cr("\ntest_asm disassembly (0x%lx 0x%lx):", code()->insts_begin(), code()->insts_end());
  code()->decode();
 }
+
 #endif // !PRODUCT
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -139,7 +139,8 @@ inline void Assembler::cmpldi(ConditionRegister crx, Register a, int ui16)   { A
 inline void Assembler::cmplw( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 0, a, b); }
 inline void Assembler::cmpld( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 1, a, b); }

-inline void Assembler::isel(Register d, Register a, Register b, int c) { emit_int32(ISEL_OPCODE    | rt(d)  | ra(a) | rb(b) | bc(c)); }
+inline void Assembler::isel(Register d, Register a, Register b, int c) { guarantee(VM_Version::has_isel(), "opcode not supported on this hardware");
+                                                                         emit_int32(ISEL_OPCODE    | rt(d)  | ra(a) | rb(b) | bc(c)); }

 // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
 inline void Assembler::andi_(   Register a, Register s, int ui16)      { emit_int32(ANDI_OPCODE    | rta(a) | rs(s) | uimm(ui16, 16)); }
@ -531,9 +532,12 @@ inline void Assembler::fmr_(FloatRegister d, FloatRegister b) { emit_int32( FMR_
 //inline void Assembler::mffgpr( FloatRegister d, Register b)   { emit_int32( MFFGPR_OPCODE | frt(d) | rb(b) | rc(0)); }
 //inline void Assembler::mftgpr( Register d, FloatRegister b)   { emit_int32( MFTGPR_OPCODE | rt(d) | frb(b) | rc(0)); }
 // add cmpb and popcntb to detect ppc power version.
-inline void Assembler::cmpb(   Register a, Register s, Register b) { emit_int32( CMPB_OPCODE    | rta(a) | rs(s) | rb(b) | rc(0)); }
-inline void Assembler::popcntb(Register a, Register s)             { emit_int32( POPCNTB_OPCODE | rta(a) | rs(s)); };
-inline void Assembler::popcntw(Register a, Register s)             { emit_int32( POPCNTW_OPCODE | rta(a) | rs(s)); };
+inline void Assembler::cmpb(   Register a, Register s, Register b) { guarantee(VM_Version::has_cmpb(), "opcode not supported on this hardware");
+                                                                     emit_int32( CMPB_OPCODE    | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::popcntb(Register a, Register s)             { guarantee(VM_Version::has_popcntb(), "opcode not supported on this hardware");
+                                                                     emit_int32( POPCNTB_OPCODE | rta(a) | rs(s)); };
+inline void Assembler::popcntw(Register a, Register s)             { guarantee(VM_Version::has_popcntw(), "opcode not supported on this hardware");
+                                                                     emit_int32( POPCNTW_OPCODE | rta(a) | rs(s)); };
 inline void Assembler::popcntd(Register a, Register s)             { emit_int32( POPCNTD_OPCODE | rta(a) | rs(s)); };

 inline void Assembler::fneg(  FloatRegister d, FloatRegister b) { emit_int32( FNEG_OPCODE  | frt(d) | frb(b) | rc(0)); }
@ -568,14 +572,17 @@ inline void Assembler::fctidz(FloatRegister d, FloatRegister b) { emit_int32( FC
 inline void Assembler::fctiw( FloatRegister d, FloatRegister b) { emit_int32( FCTIW_OPCODE  | frt(d) | frb(b) | rc(0)); }
 inline void Assembler::fctiwz(FloatRegister d, FloatRegister b) { emit_int32( FCTIWZ_OPCODE | frt(d) | frb(b) | rc(0)); }
 inline void Assembler::fcfid( FloatRegister d, FloatRegister b) { emit_int32( FCFID_OPCODE  | frt(d) | frb(b) | rc(0)); }
-inline void Assembler::fcfids(FloatRegister d, FloatRegister b) { emit_int32( FCFIDS_OPCODE | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fcfids(FloatRegister d, FloatRegister b) { guarantee(VM_Version::has_fcfids(), "opcode not supported on this hardware");
+                                                                  emit_int32( FCFIDS_OPCODE | frt(d) | frb(b) | rc(0)); }

 // PPC 1, section 4.6.7 Floating-Point Compare Instructions
 inline void Assembler::fcmpu( ConditionRegister crx, FloatRegister a, FloatRegister b) { emit_int32( FCMPU_OPCODE | bf(crx) | fra(a) | frb(b)); }

 // PPC 1, section 5.2.1 Floating-Point Arithmetic Instructions
-inline void Assembler::fsqrt( FloatRegister d, FloatRegister b) { emit_int32( FSQRT_OPCODE  | frt(d) | frb(b) | rc(0)); }
-inline void Assembler::fsqrts(FloatRegister d, FloatRegister b) { emit_int32( FSQRTS_OPCODE | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fsqrt( FloatRegister d, FloatRegister b) { guarantee(VM_Version::has_fsqrt(), "opcode not supported on this hardware");
+                                                                  emit_int32( FSQRT_OPCODE  | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fsqrts(FloatRegister d, FloatRegister b) { guarantee(VM_Version::has_fsqrts(), "opcode not supported on this hardware");
+                                                                  emit_int32( FSQRTS_OPCODE | frt(d) | frb(b) | rc(0)); }

 // Vector instructions for >= Power6.
 inline void Assembler::lvebx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEBX_OPCODE  | vrt(d) | ra0mem(s1) | rb(s2)); }
@ -703,7 +710,8 @@ inline void Assembler::vcmpgtsw_(VectorRegister d,VectorRegister a, VectorRegist
 inline void Assembler::vcmpgtub_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
 inline void Assembler::vcmpgtuh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
 inline void Assembler::vcmpgtuw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
-inline void Assembler::vand(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAND_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vand(    VectorRegister d, VectorRegister a, VectorRegister b) { guarantee(VM_Version::has_vand(), "opcode not supported on this hardware");
+                                                                                        emit_int32( VAND_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vandc(   VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VANDC_OPCODE    | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vnor(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VNOR_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
 inline void Assembler::vor(     VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VOR_OPCODE      | vrt(d) | vra(a) | vrb(b)); }
--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -87,7 +87,7 @@ define_pd_global(uint64_t,MaxRAM,                    4ULL*G);
 define_pd_global(uintx, CodeCacheMinBlockLength,     4);
 define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);

-define_pd_global(bool,  TrapBasedRangeChecks,        false);
+define_pd_global(bool,  TrapBasedRangeChecks,        true);

 // Heap related flags
 define_pd_global(uintx,MetaspaceSize,                ScaleForWordSize(16*M));
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -24,8 +24,6 @@
 */

 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "compiler/disassembler.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
@ -1120,7 +1118,7 @@ address MacroAssembler::call_c_using_toc(const FunctionDescriptor* fd,
  }
  return _last_calls_return_pc;
 }
-#endif
+#endif // ABI_ELFv2

 void MacroAssembler::call_VM_base(Register oop_result,
                                  Register last_java_sp,
@ -1794,7 +1792,7 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
  cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern);
  bne(cr_reg, cas_label);

-  load_klass_with_trap_null_check(temp_reg, obj_reg);
+  load_klass(temp_reg, obj_reg);

  load_const_optimized(temp2_reg, ~((int) markOopDesc::age_mask_in_place));
  ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg);
@ -1891,7 +1889,7 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
  // the bias from one thread to another directly in this situation.
  andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place);
  orr(temp_reg, R16_thread, temp_reg);
-  load_klass_with_trap_null_check(temp2_reg, obj_reg);
+  load_klass(temp2_reg, obj_reg);
  ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg);
  orr(temp_reg, temp_reg, temp2_reg);

@ -1927,7 +1925,7 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
  // that another thread raced us for the privilege of revoking the
  // bias of this particular object, so it's okay to continue in the
  // normal locking code.
-  load_klass_with_trap_null_check(temp_reg, obj_reg);
+  load_klass(temp_reg, obj_reg);
  ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg);
  andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place);
  orr(temp_reg, temp_reg, temp2_reg);
@ -2213,8 +2211,7 @@ void MacroAssembler::card_table_write(jbyte* byte_map_base, Register Rtmp, Regis
  stbx(R0, Rtmp, Robj);
 }

-#ifndef SERIALGC
-
+#if INCLUDE_ALL_GCS
 // General G1 pre-barrier generator.
 // Goal: record the previous value if it is not null.
 void MacroAssembler::g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val,
@ -2328,14 +2325,17 @@ void MacroAssembler::g1_write_barrier_post(Register Rstore_addr, Register Rnew_v

  // Get the address of the card.
  lbzx(/*card value*/ Rtmp3, Rbase, Rcard_addr);
+  cmpwi(CCR0, Rtmp3, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+  beq(CCR0, filtered);

-  assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
-  cmpwi(CCR0, Rtmp3 /* card value */, 0);
+  membar(Assembler::StoreLoad);
+  lbzx(/*card value*/ Rtmp3, Rbase, Rcard_addr);  // Reload after membar.
+  cmpwi(CCR0, Rtmp3 /* card value */, CardTableModRefBS::dirty_card_val());
  beq(CCR0, filtered);

  // Storing a region crossing, non-NULL oop, card is clean.
  // Dirty card and log.
-  li(Rtmp3, 0); // dirty
+  li(Rtmp3, CardTableModRefBS::dirty_card_val());
  //release(); // G1: oops are allowed to get visible after dirty marking.
  stbx(Rtmp3, Rbase, Rcard_addr);

@ -2362,7 +2362,7 @@ void MacroAssembler::g1_write_barrier_post(Register Rstore_addr, Register Rnew_v

  bind(filtered_int);
 }
-#endif // SERIALGC
+#endif // INCLUDE_ALL_GCS

 // Values for last_Java_pc, and last_Java_sp must comply to the rules
 // in frame_ppc64.hpp.
@ -2453,7 +2453,8 @@ void MacroAssembler::get_vm_result_2(Register metadata_result) {
 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
  Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
  if (Universe::narrow_klass_base() != 0) {
-    load_const(R0, Universe::narrow_klass_base(), (dst != current) ? dst : noreg); // Use dst as temp if it is free.
+    // Use dst as temp if it is free.
+    load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg);
    sub(dst, current, R0);
    current = dst;
  }
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp
@ -514,14 +514,14 @@ class MacroAssembler: public Assembler {
  void card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp);
  void card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj);

-#ifndef SERIALGC
+#if INCLUDE_ALL_GCS
  // General G1 pre-barrier generator.
  void g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val,
                            Register Rtmp1, Register Rtmp2, bool needs_frame = false);
  // General G1 post-barrier generator
  void g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1,
                             Register Rtmp2, Register Rtmp3, Label *filtered_ext = NULL);
-#endif // SERIALGC
+#endif

  // Support for managing the JavaThread pointer (i.e.; the reference to
  // thread-local information).
--- a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -119,6 +119,7 @@ void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Registe

 void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp,
                                            bool for_compiler_entry) {
+  Label L_no_such_method;
  assert(method == R19_method, "interpreter calling convention");
  assert_different_registers(method, target, temp);

@ -131,17 +132,31 @@ void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register meth
    __ lwz(temp, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
    __ cmplwi(CCR0, temp, 0);
    __ beq(CCR0, run_compiled_code);
+    // Null method test is replicated below in compiled case,
+    // it might be able to address across the verify_thread()
+    __ cmplwi(CCR0, R19_method, 0);
+    __ beq(CCR0, L_no_such_method);
    __ ld(target, in_bytes(Method::interpreter_entry_offset()), R19_method);
    __ mtctr(target);
    __ bctr();
    __ BIND(run_compiled_code);
  }

+  // Compiled case, either static or fall-through from runtime conditional
+  __ cmplwi(CCR0, R19_method, 0);
+  __ beq(CCR0, L_no_such_method);
+
  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
                                                     Method::from_interpreted_offset();
  __ ld(target, in_bytes(entry_offset), R19_method);
  __ mtctr(target);
  __ bctr();
+
+  __ bind(L_no_such_method);
+  assert(StubRoutines::throw_AbstractMethodError_entry() != NULL, "not yet generated!");
+  __ load_const_optimized(target, StubRoutines::throw_AbstractMethodError_entry());
+  __ mtctr(target);
+  __ bctr();
 }


--- a/hotspot/src/cpu/ppc/vm/ppc.ad
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad
@ -891,6 +891,13 @@ definitions %{
 // This is a block of C++ code which provides values, functions, and
 // definitions necessary in the rest of the architecture description.
 source_hpp %{
+  // Header information of the source block.
+  // Method declarations/definitions which are used outside
+  // the ad-scope can conveniently be defined here.
+  //
+  // To keep related declarations/definitions/uses close together,
+  // we switch between source %{ }% and source_hpp %{ }% freely as needed.
+
  // Returns true if Node n is followed by a MemBar node that 
  // will do an acquire. If so, this node must not do the acquire
  // operation.
@ -1114,6 +1121,40 @@ static inline void emit_long(CodeBuffer &cbuf, int value) {

 //=============================================================================

+%} // interrupt source
+
+source_hpp %{ // Header information of the source block.
+
+//--------------------------------------------------------------
+//---<  Used for optimization in Compile::Shorten_branches  >---
+//--------------------------------------------------------------
+
+const uint trampoline_stub_size     =  6 * BytesPerInstWord;
+
+class CallStubImpl {
+
+ public:
+
+  static void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
+
+  // Size of call trampoline stub.
+  // This doesn't need to be accurate to the byte, but it
+  // must be larger than or equal to the real size of the stub.
+  static uint size_call_trampoline() {
+    return trampoline_stub_size;
+  }
+
+  // number of relocations needed by a call trampoline stub
+  static uint reloc_call_trampoline() {
+    return 5;
+  }
+
+};
+
+%} // end source_hpp
+
+source %{
+
 // Emit a trampoline stub for a call to a target which is too far away.
 //
 // code sequences:
@ -1125,9 +1166,7 @@ static inline void emit_long(CodeBuffer &cbuf, int value) {
 //   load the call target from the constant pool
 //   branch via CTR (LR/link still points to the call-site above)

-const uint trampoline_stub_size = 6 * BytesPerInstWord;
-
-void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
+void CallStubImpl::emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
  // Start the stub.
  address stub = __ start_a_stub(Compile::MAX_stubs_size/2);
  if (stub == NULL) {
@ -1170,19 +1209,6 @@ void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int
  __ end_a_stub();
 }

-// Size of trampoline stub, this doesn't need to be accurate but it must
-// be larger or equal to the real size of the stub.
-// Used for optimization in Compile::Shorten_branches.
-uint size_call_trampoline() {
-  return trampoline_stub_size;
-}
-
-// Number of relocation entries needed by trampoline stub.
-// Used for optimization in Compile::Shorten_branches.
-uint reloc_call_trampoline() {
-  return 5;
-}
-
 //=============================================================================

 // Emit an inline branch-and-link call and a related trampoline stub.
@ -1221,7 +1247,7 @@ EmitCallOffsets emit_call_with_trampoline_stub(MacroAssembler &_masm, address en
    const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);

    // Emit the trampoline stub which will be related to the branch-and-link below.
-    emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
+    CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
    __ relocate(rtype);
  }

@ -2023,17 +2049,34 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {

 //=============================================================================

-uint size_exception_handler() {
-  // The exception_handler is a b64_patchable.
-  return MacroAssembler::b64_patchable_size;
-}
+%} // interrupt source

-uint size_deopt_handler() {
-  // The deopt_handler is a bl64_patchable.
-  return MacroAssembler::bl64_patchable_size;
-}
+source_hpp %{ // Header information of the source block.

-int emit_exception_handler(CodeBuffer &cbuf) {
+class HandlerImpl {
+
+ public:
+
+  static int emit_exception_handler(CodeBuffer &cbuf);
+  static int emit_deopt_handler(CodeBuffer& cbuf);
+
+  static uint size_exception_handler() {
+    // The exception_handler is a b64_patchable.
+    return MacroAssembler::b64_patchable_size;
+  }
+
+  static uint size_deopt_handler() {
+    // The deopt_handler is a bl64_patchable.
+    return MacroAssembler::bl64_patchable_size;
+  }
+
+};
+
+%} // end source_hpp
+
+source %{
+
+int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
  MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_exception_handler());
@ -2050,7 +2093,7 @@ int emit_exception_handler(CodeBuffer &cbuf) {

 // The deopt_handler is like the exception handler, but it calls to
 // the deoptimization blob instead of jumping to the exception blob.
-int emit_deopt_handler(CodeBuffer& cbuf) {
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
  MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_deopt_handler());
@ -3438,7 +3481,7 @@ encode %{
        const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);

        // Emit the trampoline stub which will be related to the branch-and-link below.
-        emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
+        CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
        __ relocate(_optimized_virtual ?
                    relocInfo::opt_virtual_call_type : relocInfo::static_call_type);
      }
@ -3481,7 +3524,7 @@ encode %{
      const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);

      // Emit the trampoline stub which will be related to the branch-and-link below.
-      emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
+      CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
      assert(_optimized_virtual, "methodHandle call should be a virtual call");
      __ relocate(relocInfo::opt_virtual_call_type);
    }
@ -3531,7 +3574,7 @@ encode %{
      const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method;
      const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none);
      const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
-      emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());
+      CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());

      if (ra_->C->env()->failing())
        return;
@ -8755,6 +8798,7 @@ instruct sqrtD_reg(regD dst, regD src) %{
 // Single-precision sqrt.
 instruct sqrtF_reg(regF dst, regF src) %{
  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+  predicate(VM_Version::has_fsqrts());
  ins_cost(DEFAULT_COST);

  format %{ "FSQRTS  $dst, $src" %}
@ -11550,8 +11594,7 @@ instruct safePoint_poll_conPollAddr(rscratch2RegP poll) %{
  // effect no longer needs to be mentioned, since r0 is not contained
  // in a reg_class.

-  format %{ "LD      R12, addr of polling page\n\t"
-            "LD      R0, #0, R12 \t// Safepoint poll for GC" %}
+  format %{ "LD      R0, #0, R12 \t// Safepoint poll for GC" %}
  ins_encode( enc_poll(0x0, poll) );
  ins_pipe(pipe_class_default);
 %}
--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
@ -34,6 +34,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/vframeArray.hpp"
 #include "vmreg_ppc.inline.hpp"
+#include "adfiles/ad_ppc_64.hpp"
 #ifdef COMPILER1
 #include "c1/c1_Runtime1.hpp"
 #endif
@ -52,10 +53,6 @@
 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")


-// Used by generate_deopt_blob.  Defined in .ad file.
-extern uint size_deopt_handler();
-
-
 class RegisterSaver {
 // Used for saving volatile registers.
 public:
@ -2782,7 +2779,7 @@ void SharedRuntime::generate_deopt_blob() {
  // We can't grab a free register here, because all registers may
  // contain live values, so let the RegisterSaver do the adjustment
  // of the return pc.
-  const int return_pc_adjustment_no_exception = -size_deopt_handler();
+  const int return_pc_adjustment_no_exception = -HandlerImpl::size_deopt_handler();

  // Push the "unpack frame"
  // Save everything in sight.
--- a/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -23,17 +23,6 @@
 *
 */

-#include "precompiled.hpp"
-#include "runtime/deoptimization.hpp"
-#include "runtime/frame.inline.hpp"
-#include "runtime/stubRoutines.hpp"
-#ifdef TARGET_OS_FAMILY_aix
-# include "thread_aix.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_linux
-# include "thread_linux.inline.hpp"
-#endif
-
 // Implementation of the platform-specific part of StubRoutines - for
 // a description of how to extend it, see the stubRoutines.hpp file.

--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -402,6 +402,9 @@ void VM_Version::determine_features() {
  CodeBuffer cb("detect_cpu_features", code_size, 0);
  MacroAssembler* a = new MacroAssembler(&cb);

+  // Must be set to true so we can generate the test code.
+  _features = VM_Version::all_features_m;
+
  // Emit code.
  void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry();
  uint32_t *code = (uint32_t *)a->pc();
@ -409,14 +412,15 @@ void VM_Version::determine_features() {
  // Keep R3_ARG1 unmodified, it contains &field (see below).
  // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
  a->fsqrt(F3, F4);                            // code[0] -> fsqrt_m
-  a->isel(R7, R5, R6, 0);                      // code[1] -> isel_m
-  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[2] -> lxarx_m
-  a->cmpb(R7, R5, R6);                         // code[3] -> bcmp
-  //a->mftgpr(R7, F3);                         // code[4] -> mftgpr
-  a->popcntb(R7, R5);                          // code[5] -> popcntb
-  a->popcntw(R7, R5);                          // code[6] -> popcntw
-  a->fcfids(F3, F4);                           // code[7] -> fcfids
-  a->vand(VR0, VR0, VR0);                      // code[8] -> vand
+  a->fsqrts(F3, F4);                           // code[1] -> fsqrts_m
+  a->isel(R7, R5, R6, 0);                      // code[2] -> isel_m
+  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
+  a->cmpb(R7, R5, R6);                         // code[4] -> bcmp
+  //a->mftgpr(R7, F3);                         // code[5] -> mftgpr
+  a->popcntb(R7, R5);                          // code[6] -> popcntb
+  a->popcntw(R7, R5);                          // code[7] -> popcntw
+  a->fcfids(F3, F4);                           // code[8] -> fcfids
+  a->vand(VR0, VR0, VR0);                      // code[9] -> vand
  a->blr();

  // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@ -426,6 +430,7 @@ void VM_Version::determine_features() {

  uint32_t *code_end = (uint32_t *)a->pc();
  a->flush();
+  _features = VM_Version::unknown_m;

  // Print the detection code.
  if (PrintAssembly) {
@ -450,6 +455,7 @@ void VM_Version::determine_features() {
  // determine which instructions are legal.
  int feature_cntr = 0;
  if (code[feature_cntr++]) features |= fsqrt_m;
+  if (code[feature_cntr++]) features |= fsqrts_m;
  if (code[feature_cntr++]) features |= isel_m;
  if (code[feature_cntr++]) features |= lxarxeh_m;
  if (code[feature_cntr++]) features |= cmpb_m;
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -33,6 +33,7 @@ class VM_Version: public Abstract_VM_Version {
 protected:
  enum Feature_Flag {
    fsqrt,
+    fsqrts,
    isel,
    lxarxeh,
    cmpb,
@ -46,6 +47,7 @@ protected:
  enum Feature_Flag_Set {
    unknown_m             = 0,
    fsqrt_m               = (1 << fsqrt  ),
+    fsqrts_m              = (1 << fsqrts ),
    isel_m                = (1 << isel   ),
    lxarxeh_m             = (1 << lxarxeh),
    cmpb_m                = (1 << cmpb   ),
@ -72,6 +74,7 @@ public:
  static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
  // CPU instruction support
  static bool has_fsqrt()   { return (_features & fsqrt_m) != 0; }
+  static bool has_fsqrts()  { return (_features & fsqrts_m) != 0; }
  static bool has_isel()    { return (_features & isel_m) != 0; }
  static bool has_lxarxeh() { return (_features & lxarxeh_m) !=0; }
  static bool has_cmpb()    { return (_features & cmpb_m) != 0; }
--- a/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -79,7 +79,7 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
  address npe_addr = __ pc(); // npe = null pointer exception
  __ load_klass_with_trap_null_check(rcvr_klass, R3);

- // Set methodOop (in case of interpreted method), and destination address.
+ // Set method (in case of interpreted method), and destination address.
  int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();

 #ifndef PRODUCT
@ -161,8 +161,6 @@ VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
  address npe_addr = __ pc(); // npe = null pointer exception
  __ load_klass_with_trap_null_check(rcvr_klass, R3_ARG1);

-  //__ ld(rcvr_klass, oopDesc::klass_offset_in_bytes(), R3_ARG1);
-
  BLOCK_COMMENT("Load start of itable entries into itable_entry.");
  __ lwz(vtable_len, InstanceKlass::vtable_length_offset() * wordSize, rcvr_klass);
  __ slwi(vtable_len, vtable_len, exact_log2(vtableEntry::size() * wordSize));
@ -199,7 +197,7 @@ VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
                                   itable_offset_search_inc;
  __ lwz(vtable_offset, vtable_offset_offset, itable_entry_addr);

-  // Compute itableMethodEntry and get methodOop and entry point for compiler.
+  // Compute itableMethodEntry and get method and entry point for compiler.
  const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) +
    itableMethodEntry::method_offset_in_bytes();

@ -211,7 +209,7 @@ VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
    Label ok;
    __ cmpd(CCR0, R19_method, 0);
    __ bne(CCR0, ok);
-    __ stop("methodOop is null", 103);
+    __ stop("method is null", 103);
    __ bind(ok);
  }
 #endif
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
@ -3320,7 +3320,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest,

  // if tmp is invalid, then the function being called doesn't destroy the thread
  if (tmp->is_valid()) {
-    __ save_thread(tmp->as_register());
+    __ save_thread(tmp->as_pointer_register());
  }
  __ call(dest, relocInfo::runtime_call_type);
  __ delayed()->nop();
@ -3328,7 +3328,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest,
    add_call_info_here(info);
  }
  if (tmp->is_valid()) {
-    __ restore_thread(tmp->as_register());
+    __ restore_thread(tmp->as_pointer_register());
  }

 #ifdef ASSERT
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
@ -69,7 +69,7 @@ void LIRItem::load_nonconstant() {
 LIR_Opr LIRGenerator::exceptionOopOpr()              { return FrameMap::Oexception_opr;  }
 LIR_Opr LIRGenerator::exceptionPcOpr()               { return FrameMap::Oissuing_pc_opr; }
 LIR_Opr LIRGenerator::syncTempOpr()                  { return new_register(T_OBJECT); }
-LIR_Opr LIRGenerator::getThreadTemp()                { return rlock_callee_saved(T_INT); }
+LIR_Opr LIRGenerator::getThreadTemp()                { return rlock_callee_saved(NOT_LP64(T_INT) LP64_ONLY(T_LONG)); }

 LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
  LIR_Opr opr;
--- a/hotspot/src/cpu/sparc/vm/c1_globals_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/c1_globals_sparc.hpp
@ -66,6 +66,4 @@ define_pd_global(bool, OptimizeSinglePrecision,      false);
 define_pd_global(bool, CSEArrayLength,               true );
 define_pd_global(bool, TwoOperandLIRForm,            false);

-define_pd_global(intx, SafepointPollOffset,          0    );
-
 #endif // CPU_SPARC_VM_C1_GLOBALS_SPARC_HPP
--- a/hotspot/src/cpu/sparc/vm/sparc.ad
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad
@ -457,6 +457,13 @@ definitions %{
 // This is a block of C++ code which provides values, functions, and
 // definitions necessary in the rest of the architecture description
 source_hpp %{
+// Header information of the source block.
+// Method declarations/definitions which are used outside
+// the ad-scope can conveniently be defined here.
+//
+// To keep related declarations/definitions/uses close together,
+// we switch between source %{ }% and source_hpp %{ }% freely as needed.
+
 // Must be visible to the DFA in dfa_sparc.cpp
 extern bool can_branch_register( Node *bol, Node *cmp );

@ -468,6 +475,46 @@ extern bool use_block_zeroing(Node* count);
 #define LONG_HI_REG(x) (x)
 #define LONG_LO_REG(x) (x)

+class CallStubImpl {
+
+  //--------------------------------------------------------------
+  //---<  Used for optimization in Compile::Shorten_branches  >---
+  //--------------------------------------------------------------
+
+ public:
+  // Size of call trampoline stub.
+  static uint size_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+
+  // number of relocations needed by a call trampoline stub
+  static uint reloc_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+};
+
+class HandlerImpl {
+
+ public:
+
+  static int emit_exception_handler(CodeBuffer &cbuf);
+  static int emit_deopt_handler(CodeBuffer& cbuf);
+
+  static uint size_exception_handler() {
+    if (TraceJumps) {
+      return (400); // just a guess
+    }
+    return ( NativeJump::instruction_size ); // sethi;jmp;nop
+  }
+
+  static uint size_deopt_handler() {
+    if (TraceJumps) {
+      return (400); // just a guess
+    }
+    return ( 4+  NativeJump::instruction_size ); // save;sethi;jmp;restore
+  }
+};
+
 %}

 source %{
@ -1710,22 +1757,9 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {

 //=============================================================================

-uint size_exception_handler() {
-  if (TraceJumps) {
-    return (400); // just a guess
-  }
-  return ( NativeJump::instruction_size ); // sethi;jmp;nop
-}
-
-uint size_deopt_handler() {
-  if (TraceJumps) {
-    return (400); // just a guess
-  }
-  return ( 4+  NativeJump::instruction_size ); // save;sethi;jmp;restore
-}

 // Emit exception handler code.
-int emit_exception_handler(CodeBuffer& cbuf) {
+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
  Register temp_reg = G3;
  AddressLiteral exception_blob(OptoRuntime::exception_blob()->entry_point());
  MacroAssembler _masm(&cbuf);
@ -1746,7 +1780,7 @@ int emit_exception_handler(CodeBuffer& cbuf) {
  return offset;
 }

-int emit_deopt_handler(CodeBuffer& cbuf) {
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
  // Can't use any of the current frame's registers as we may have deopted
  // at a poll and everything (including G3) can be live.
  Register temp_reg = L0;
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
@ -1112,7 +1112,6 @@ void Assembler::bsfl(Register dst, Register src) {
 }

 void Assembler::bsrl(Register dst, Register src) {
-  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
  int encode = prefix_and_encode(dst->encoding(), src->encoding());
  emit_int8(0x0F);
  emit_int8((unsigned char)0xBD);
@ -2343,6 +2342,11 @@ void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector25
  emit_int8(imm8);
 }

+void Assembler::pause() {
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)0x90);
+}
+
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
  assert(VM_Version::supports_sse4_2(), "");
  InstructionMark im(this);
@ -2667,6 +2671,11 @@ void Assembler::rcll(Register dst, int imm8) {
  }
 }

+void Assembler::rdtsc() {
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0x31);
+}
+
 // copies data from [esi] to [edi] using rcx pointer sized words
 // generic
 void Assembler::rep_mov() {
@ -2976,6 +2985,11 @@ void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
 }

+void Assembler::xabort(int8_t imm8) {
+  emit_int8((unsigned char)0xC6);
+  emit_int8((unsigned char)0xF8);
+  emit_int8((unsigned char)(imm8 & 0xFF));
+}

 void Assembler::xaddl(Address dst, Register src) {
  InstructionMark im(this);
@ -2985,6 +2999,24 @@ void Assembler::xaddl(Address dst, Register src) {
  emit_operand(src, dst);
 }

+void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
+  InstructionMark im(this);
+  relocate(rtype);
+  if (abort.is_bound()) {
+    address entry = target(abort);
+    assert(entry != NULL, "abort entry NULL");
+    intptr_t offset = entry - pc();
+    emit_int8((unsigned char)0xC7);
+    emit_int8((unsigned char)0xF8);
+    emit_int32(offset - 6); // 2 opcode + 4 address
+  } else {
+    abort.add_patch_at(code(), locator());
+    emit_int8((unsigned char)0xC7);
+    emit_int8((unsigned char)0xF8);
+    emit_int32(0);
+  }
+}
+
 void Assembler::xchgl(Register dst, Address src) { // xchg
  InstructionMark im(this);
  prefix(src, dst);
@ -2998,6 +3030,12 @@ void Assembler::xchgl(Register dst, Register src) {
  emit_int8((unsigned char)(0xC0 | encode));
 }

+void Assembler::xend() {
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0x01);
+  emit_int8((unsigned char)0xD5);
+}
+
 void Assembler::xgetbv() {
  emit_int8(0x0F);
  emit_int8(0x01);
@ -4938,7 +4976,6 @@ void Assembler::bsfq(Register dst, Register src) {
 }

 void Assembler::bsrq(Register dst, Register src) {
-  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  emit_int8(0x0F);
  emit_int8((unsigned char)0xBD);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
@ -1451,6 +1451,8 @@ private:
  // Pemutation of 64bit words
  void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);

+  void pause();
+
  // SSE4.2 string instructions
  void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
  void pcmpestri(XMMRegister xmm1, Address src, int imm8);
@ -1535,6 +1537,8 @@ private:

  void rclq(Register dst, int imm8);

+  void rdtsc();
+
  void ret(int imm16);

  void sahf();
@ -1632,16 +1636,22 @@ private:
  void ucomiss(XMMRegister dst, Address src);
  void ucomiss(XMMRegister dst, XMMRegister src);

+  void xabort(int8_t imm8);
+
  void xaddl(Address dst, Register src);

  void xaddq(Address dst, Register src);

+  void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
+
  void xchgl(Register reg, Address adr);
  void xchgl(Register dst, Register src);

  void xchgq(Register reg, Address adr);
  void xchgq(Register dst, Register src);

+  void xend();
+
  // Get Value of Extended Control Register
  void xgetbv();

--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
@ -604,8 +604,7 @@ void LIR_Assembler::return_op(LIR_Opr result) {

  // Note: we do not need to round double result; float result has the right precision
  // the poll sets the condition code, but no data registers
-  AddressLiteral polling_page(os::get_polling_page() + (SafepointPollOffset % os::vm_page_size()),
-                              relocInfo::poll_return_type);
+  AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);

  if (Assembler::is_polling_page_far()) {
    __ lea(rscratch1, polling_page);
@ -619,8 +618,7 @@ void LIR_Assembler::return_op(LIR_Opr result) {


 int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
-  AddressLiteral polling_page(os::get_polling_page() + (SafepointPollOffset % os::vm_page_size()),
-                              relocInfo::poll_type);
+  AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
  guarantee(info != NULL, "Shouldn't be NULL");
  int offset = __ offset();
  if (Assembler::is_polling_page_far()) {
--- a/hotspot/src/cpu/x86/vm/c1_globals_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/c1_globals_x86.hpp
@ -65,6 +65,4 @@ define_pd_global(bool, OptimizeSinglePrecision,      true );
 define_pd_global(bool, CSEArrayLength,               false);
 define_pd_global(bool, TwoOperandLIRForm,            true );

-define_pd_global(intx, SafepointPollOffset,          256  );
-
 #endif // CPU_X86_VM_C1_GLOBALS_X86_HPP
--- a/hotspot/src/cpu/x86/vm/globals_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp
@ -129,6 +129,42 @@ define_pd_global(uintx, TypeProfileLevel, 111);
  product(bool, UseFastStosb, false,                                        \
          "Use fast-string operation for zeroing: rep stosb")               \
                                                                            \
+  /* Use Restricted Transactional Memory for lock eliding */                \
+  product(bool, UseRTMLocking, false,                                       \
+          "Enable RTM lock eliding for inflated locks in compiled code")    \
+                                                                            \
+  experimental(bool, UseRTMForStackLocks, false,                            \
+          "Enable RTM lock eliding for stack locks in compiled code")       \
+                                                                            \
+  product(bool, UseRTMDeopt, false,                                         \
+          "Perform deopt and recompilation based on RTM abort ratio")       \
+                                                                            \
+  product(uintx, RTMRetryCount, 5,                                          \
+          "Number of RTM retries on lock abort or busy")                    \
+                                                                            \
+  experimental(intx, RTMSpinLoopCount, 100,                                 \
+          "Spin count for lock to become free before RTM retry")            \
+                                                                            \
+  experimental(intx, RTMAbortThreshold, 1000,                               \
+          "Calculate abort ratio after this number of aborts")              \
+                                                                            \
+  experimental(intx, RTMLockingThreshold, 10000,                            \
+          "Lock count at which to do RTM lock eliding without "             \
+          "abort ratio calculation")                                        \
+                                                                            \
+  experimental(intx, RTMAbortRatio, 50,                                     \
+          "Lock abort ratio at which to stop use RTM lock eliding")         \
+                                                                            \
+  experimental(intx, RTMTotalCountIncrRate, 64,                             \
+          "Increment total RTM attempted lock count once every n times")    \
+                                                                            \
+  experimental(intx, RTMLockingCalculationDelay, 0,                         \
+          "Number of milliseconds to wait before start calculating aborts " \
+          "for RTM locking")                                                \
+                                                                            \
+  experimental(bool, UseRTMXendForLockBusy, false,                          \
+          "Use RTM Xend instead of Xabort when lock busy")                  \
+                                                                            \
  /* assembler */                                                           \
  product(bool, Use486InstrsOnly, false,                                    \
          "Use 80486 Compliant instruction subset")                         \
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
@ -301,7 +301,9 @@ void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
  mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 }

-void MacroAssembler::movptr(Register dst, AddressLiteral src) {
+void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
+  // scratch register is not used,
+  // it is defined to match parameters of 64-bit version of this method.
  if (src.is_lval()) {
    mov_literal32(dst, (intptr_t)src.target(), src.rspec());
  } else {
@ -613,6 +615,15 @@ void MacroAssembler::decrementq(Address dst, int value) {
  /* else */      { subq(dst, value)       ; return; }
 }

+void MacroAssembler::incrementq(AddressLiteral dst) {
+  if (reachable(dst)) {
+    incrementq(as_Address(dst));
+  } else {
+    lea(rscratch1, dst);
+    incrementq(Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::incrementq(Register reg, int value) {
  if (value == min_jint) { addq(reg, value); return; }
  if (value <  0) { decrementq(reg, -value); return; }
@ -681,15 +692,15 @@ void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
  movq(dst, rscratch1);
 }

-void MacroAssembler::movptr(Register dst, AddressLiteral src) {
+void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
  if (src.is_lval()) {
    mov_literal64(dst, (intptr_t)src.target(), src.rspec());
  } else {
    if (reachable(src)) {
      movq(dst, as_Address(src));
    } else {
-      lea(rscratch1, src);
-      movq(dst, Address(rscratch1,0));
+      lea(scratch, src);
+      movq(dst, Address(scratch, 0));
    }
  }
 }
@ -988,21 +999,38 @@ void MacroAssembler::andptr(Register dst, int32_t imm32) {
  LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
 }

-void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
-  pushf();
-  if (reachable(counter_addr)) {
-    if (os::is_MP())
-      lock();
-    incrementl(as_Address(counter_addr));
-  } else {
-    lea(rscratch1, counter_addr);
-    if (os::is_MP())
-      lock();
-    incrementl(Address(rscratch1, 0));
-  }
-  popf();
+void MacroAssembler::atomic_incl(Address counter_addr) {
+  if (os::is_MP())
+    lock();
+  incrementl(counter_addr);
 }

+void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) {
+  if (reachable(counter_addr)) {
+    atomic_incl(as_Address(counter_addr));
+  } else {
+    lea(scr, counter_addr);
+    atomic_incl(Address(scr, 0));
+  }
+}
+
+#ifdef _LP64
+void MacroAssembler::atomic_incq(Address counter_addr) {
+  if (os::is_MP())
+    lock();
+  incrementq(counter_addr);
+}
+
+void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) {
+  if (reachable(counter_addr)) {
+    atomic_incq(as_Address(counter_addr));
+  } else {
+    lea(scr, counter_addr);
+    atomic_incq(Address(scr, 0));
+  }
+}
+#endif
+
 // Writes to stack successive pages until offset reached to check for
 // stack overflow + shadow pages.  This clobbers tmp.
 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
@ -1274,6 +1302,325 @@ void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, La
 }

 #ifdef COMPILER2
+
+#if INCLUDE_RTM_OPT
+
+// Update rtm_counters based on abort status
+// input: abort_status
+//        rtm_counters (RTMLockingCounters*)
+// flags are killed
+void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
+
+  atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
+  if (PrintPreciseRTMLockingStatistics) {
+    for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
+      Label check_abort;
+      testl(abort_status, (1<<i));
+      jccb(Assembler::equal, check_abort);
+      atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
+      bind(check_abort);
+    }
+  }
+}
+
+// Branch if (random & (count-1) != 0), count is 2^n
+// tmp, scr and flags are killed
+void MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) {
+  assert(tmp == rax, "");
+  assert(scr == rdx, "");
+  rdtsc(); // modifies EDX:EAX
+  andptr(tmp, count-1);
+  jccb(Assembler::notZero, brLabel);
+}
+
+// Perform abort ratio calculation, set no_rtm bit if high ratio
+// input:  rtm_counters_Reg (RTMLockingCounters* address)
+// tmpReg, rtm_counters_Reg and flags are killed
+void MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg,
+                                                 Register rtm_counters_Reg,
+                                                 RTMLockingCounters* rtm_counters,
+                                                 Metadata* method_data) {
+  Label L_done, L_check_always_rtm1, L_check_always_rtm2;
+
+  if (RTMLockingCalculationDelay > 0) {
+    // Delay calculation
+    movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg);
+    testptr(tmpReg, tmpReg);
+    jccb(Assembler::equal, L_done);
+  }
+  // Abort ratio calculation only if abort_count > RTMAbortThreshold
+  //   Aborted transactions = abort_count * 100
+  //   All transactions = total_count *  RTMTotalCountIncrRate
+  //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
+
+  movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset()));
+  cmpptr(tmpReg, RTMAbortThreshold);
+  jccb(Assembler::below, L_check_always_rtm2);
+  imulptr(tmpReg, tmpReg, 100);
+
+  Register scrReg = rtm_counters_Reg;
+  movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
+  imulptr(scrReg, scrReg, RTMTotalCountIncrRate);
+  imulptr(scrReg, scrReg, RTMAbortRatio);
+  cmpptr(tmpReg, scrReg);
+  jccb(Assembler::below, L_check_always_rtm1);
+  if (method_data != NULL) {
+    // set rtm_state to "no rtm" in MDO
+    mov_metadata(tmpReg, method_data);
+    if (os::is_MP()) {
+      lock();
+    }
+    orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM);
+  }
+  jmpb(L_done);
+  bind(L_check_always_rtm1);
+  // Reload RTMLockingCounters* address
+  lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
+  bind(L_check_always_rtm2);
+  movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
+  cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
+  jccb(Assembler::below, L_done);
+  if (method_data != NULL) {
+    // set rtm_state to "always rtm" in MDO
+    mov_metadata(tmpReg, method_data);
+    if (os::is_MP()) {
+      lock();
+    }
+    orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM);
+  }
+  bind(L_done);
+}
+
+// Update counters and perform abort ratio calculation
+// input:  abort_status_Reg
+// rtm_counters_Reg, flags are killed
+void MacroAssembler::rtm_profiling(Register abort_status_Reg,
+                                   Register rtm_counters_Reg,
+                                   RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data,
+                                   bool profile_rtm) {
+
+  assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+  // update rtm counters based on rax value at abort
+  // reads abort_status_Reg, updates flags
+  lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
+  rtm_counters_update(abort_status_Reg, rtm_counters_Reg);
+  if (profile_rtm) {
+    // Save abort status because abort_status_Reg is used by following code.
+    if (RTMRetryCount > 0) {
+      push(abort_status_Reg);
+    }
+    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+    rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data);
+    // restore abort status
+    if (RTMRetryCount > 0) {
+      pop(abort_status_Reg);
+    }
+  }
+}
+
+// Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4)
+// inputs: retry_count_Reg
+//       : abort_status_Reg
+// output: retry_count_Reg decremented by 1
+// flags are killed
+void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) {
+  Label doneRetry;
+  assert(abort_status_Reg == rax, "");
+  // The abort reason bits are in eax (see all states in rtmLocking.hpp)
+  // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4)
+  // if reason is in 0x6 and retry count != 0 then retry
+  andptr(abort_status_Reg, 0x6);
+  jccb(Assembler::zero, doneRetry);
+  testl(retry_count_Reg, retry_count_Reg);
+  jccb(Assembler::zero, doneRetry);
+  pause();
+  decrementl(retry_count_Reg);
+  jmp(retryLabel);
+  bind(doneRetry);
+}
+
+// Spin and retry if lock is busy,
+// inputs: box_Reg (monitor address)
+//       : retry_count_Reg
+// output: retry_count_Reg decremented by 1
+//       : clear z flag if retry count exceeded
+// tmp_Reg, scr_Reg, flags are killed
+void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg,
+                                            Register tmp_Reg, Register scr_Reg, Label& retryLabel) {
+  Label SpinLoop, SpinExit, doneRetry;
+  // Clean monitor_value bit to get valid pointer
+  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+
+  testl(retry_count_Reg, retry_count_Reg);
+  jccb(Assembler::zero, doneRetry);
+  decrementl(retry_count_Reg);
+  movptr(scr_Reg, RTMSpinLoopCount);
+
+  bind(SpinLoop);
+  pause();
+  decrementl(scr_Reg);
+  jccb(Assembler::lessEqual, SpinExit);
+  movptr(tmp_Reg, Address(box_Reg, owner_offset));
+  testptr(tmp_Reg, tmp_Reg);
+  jccb(Assembler::notZero, SpinLoop);
+
+  bind(SpinExit);
+  jmp(retryLabel);
+  bind(doneRetry);
+  incrementl(retry_count_Reg); // clear z flag
+}
+
+// Use RTM for normal stack locks
+// Input: objReg (object to lock)
+void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
+                                       Register retry_on_abort_count_Reg,
+                                       RTMLockingCounters* stack_rtm_counters,
+                                       Metadata* method_data, bool profile_rtm,
+                                       Label& DONE_LABEL, Label& IsInflated) {
+  assert(UseRTMForStackLocks, "why call this otherwise?");
+  assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+  assert(tmpReg == rax, "");
+  assert(scrReg == rdx, "");
+  Label L_rtm_retry, L_decrement_retry, L_on_abort;
+
+  if (RTMRetryCount > 0) {
+    movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
+    bind(L_rtm_retry);
+  }
+  if (!UseRTMXendForLockBusy) {
+    movptr(tmpReg, Address(objReg, 0));
+    testptr(tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
+    jcc(Assembler::notZero, IsInflated);
+  }
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    Label L_noincrement;
+    if (RTMTotalCountIncrRate > 1) {
+      // tmpReg, scrReg and flags are killed
+      branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
+    }
+    assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
+    atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
+    bind(L_noincrement);
+  }
+  xbegin(L_on_abort);
+  movptr(tmpReg, Address(objReg, 0));       // fetch markword
+  andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+  cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
+  jcc(Assembler::equal, DONE_LABEL);        // all done if unlocked
+
+  Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
+  if (UseRTMXendForLockBusy) {
+    xend();
+    movptr(tmpReg, Address(objReg, 0));
+    testptr(tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
+    jcc(Assembler::notZero, IsInflated);
+    movptr(abort_status_Reg, 0x1);                // Set the abort status to 1 (as xabort does)
+    jmp(L_decrement_retry);
+  }
+  else {
+    xabort(0);
+  }
+  bind(L_on_abort);
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm);
+  }
+  bind(L_decrement_retry);
+  if (RTMRetryCount > 0) {
+    // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
+    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
+  }
+}
+
+// Use RTM for inflating locks
+// inputs: objReg (object to lock)
+//         boxReg (on-stack box address (displaced header location) - KILLED)
+//         tmpReg (ObjectMonitor address + 2(monitor_value))
+void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
+                                          Register scrReg, Register retry_on_busy_count_Reg,
+                                          Register retry_on_abort_count_Reg,
+                                          RTMLockingCounters* rtm_counters,
+                                          Metadata* method_data, bool profile_rtm,
+                                          Label& DONE_LABEL) {
+  assert(UseRTMLocking, "why call this otherwise?");
+  assert(tmpReg == rax, "");
+  assert(scrReg == rdx, "");
+  Label L_rtm_retry, L_decrement_retry, L_on_abort;
+  // Clean monitor_value bit to get valid pointer
+  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+
+  // Without cast to int32_t a movptr will destroy r10 which is typically obj
+  movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+  movptr(boxReg, tmpReg); // Save ObjectMonitor address
+
+  if (RTMRetryCount > 0) {
+    movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
+    movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
+    bind(L_rtm_retry);
+  }
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    Label L_noincrement;
+    if (RTMTotalCountIncrRate > 1) {
+      // tmpReg, scrReg and flags are killed
+      branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
+    }
+    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+    atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
+    bind(L_noincrement);
+  }
+  xbegin(L_on_abort);
+  movptr(tmpReg, Address(objReg, 0));
+  movptr(tmpReg, Address(tmpReg, owner_offset));
+  testptr(tmpReg, tmpReg);
+  jcc(Assembler::zero, DONE_LABEL);
+  if (UseRTMXendForLockBusy) {
+    xend();
+    jmp(L_decrement_retry);
+  }
+  else {
+    xabort(0);
+  }
+  bind(L_on_abort);
+  Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
+  }
+  if (RTMRetryCount > 0) {
+    // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
+    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
+  }
+
+  movptr(tmpReg, Address(boxReg, owner_offset)) ;
+  testptr(tmpReg, tmpReg) ;
+  jccb(Assembler::notZero, L_decrement_retry) ;
+
+  // Appears unlocked - try to swing _owner from null to non-null.
+  // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
+#ifdef _LP64
+  Register threadReg = r15_thread;
+#else
+  get_thread(scrReg);
+  Register threadReg = scrReg;
+#endif
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
+
+  if (RTMRetryCount > 0) {
+    // success done else retry
+    jccb(Assembler::equal, DONE_LABEL) ;
+    bind(L_decrement_retry);
+    // Spin and retry if lock is busy.
+    rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
+  }
+  else {
+    bind(L_decrement_retry);
+  }
+}
+
+#endif //  INCLUDE_RTM_OPT
+
 // Fast_Lock and Fast_Unlock used by C2

 // Because the transitions from emitted code to the runtime
@ -1350,17 +1697,26 @@ void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, La
 // box: on-stack box address (displaced header location) - KILLED
 // rax,: tmp -- KILLED
 // scr: tmp -- KILLED
-void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) {
+void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
+                               Register scrReg, Register cx1Reg, Register cx2Reg,
+                               BiasedLockingCounters* counters,
+                               RTMLockingCounters* rtm_counters,
+                               RTMLockingCounters* stack_rtm_counters,
+                               Metadata* method_data,
+                               bool use_rtm, bool profile_rtm) {
  // Ensure the register assignents are disjoint
-  guarantee (objReg != boxReg, "");
-  guarantee (objReg != tmpReg, "");
-  guarantee (objReg != scrReg, "");
-  guarantee (boxReg != tmpReg, "");
-  guarantee (boxReg != scrReg, "");
-  guarantee (tmpReg == rax, "");
+  assert(tmpReg == rax, "");
+
+  if (use_rtm) {
+    assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
+  } else {
+    assert(cx1Reg == noreg, "");
+    assert(cx2Reg == noreg, "");
+    assert_different_registers(objReg, boxReg, tmpReg, scrReg);
+  }

  if (counters != NULL) {
-    atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()));
+    atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
  }
  if (EmitSync & 1) {
      // set box->dhw = unused_mark (3)
@ -1419,12 +1775,20 @@ void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg
      biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
    }

+#if INCLUDE_RTM_OPT
+    if (UseRTMForStackLocks && use_rtm) {
+      rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
+                        stack_rtm_counters, method_data, profile_rtm,
+                        DONE_LABEL, IsInflated);
+    }
+#endif // INCLUDE_RTM_OPT
+
    movptr(tmpReg, Address(objReg, 0));          // [FETCH]
-    testl (tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
-    jccb  (Assembler::notZero, IsInflated);
+    testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
+    jccb(Assembler::notZero, IsInflated);

    // Attempt stack-locking ...
-    orptr (tmpReg, 0x1);
+    orptr (tmpReg, markOopDesc::unlocked_value);
    movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
    if (os::is_MP()) {
      lock();
@ -1434,19 +1798,32 @@ void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg
      cond_inc32(Assembler::equal,
                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
    }
-    jccb(Assembler::equal, DONE_LABEL);
+    jcc(Assembler::equal, DONE_LABEL);           // Success

-    // Recursive locking
+    // Recursive locking.
+    // The object is stack-locked: markword contains stack pointer to BasicLock.
+    // Locked by current thread if difference with current SP is less than one page.
    subptr(tmpReg, rsp);
+    // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
    andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
    movptr(Address(boxReg, 0), tmpReg);
    if (counters != NULL) {
      cond_inc32(Assembler::equal,
                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
    }
-    jmpb(DONE_LABEL);
+    jmp(DONE_LABEL);

    bind(IsInflated);
+    // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value)
+
+#if INCLUDE_RTM_OPT
+    // Use the same RTM locking code in 32- and 64-bit VM.
+    if (use_rtm) {
+      rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
+                           rtm_counters, method_data, profile_rtm, DONE_LABEL);
+    } else {
+#endif // INCLUDE_RTM_OPT
+
 #ifndef _LP64
    // The object is inflated.
    //
@ -1576,7 +1953,7 @@ void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg
    // Without cast to int32_t a movptr will destroy r10 which is typically obj
    movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));

-    mov    (boxReg, tmpReg);
+    movptr (boxReg, tmpReg);
    movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
    testptr(tmpReg, tmpReg);
    jccb   (Assembler::notZero, DONE_LABEL);
@ -1587,9 +1964,11 @@ void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg
    }
    cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
    // Intentional fall-through into DONE_LABEL ...
+#endif // _LP64

+#if INCLUDE_RTM_OPT
+    } // use_rtm()
 #endif
-
    // DONE_LABEL is a hot target - we'd really like to place it at the
    // start of cache line by padding with NOPs.
    // See the AMD and Intel software optimization manuals for the
@ -1631,11 +2010,9 @@ void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg
 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.

-void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
-  guarantee (objReg != boxReg, "");
-  guarantee (objReg != tmpReg, "");
-  guarantee (boxReg != tmpReg, "");
-  guarantee (boxReg == rax, "");
+void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
+  assert(boxReg == rax, "");
+  assert_different_registers(objReg, boxReg, tmpReg);

  if (EmitSync & 4) {
    // Disable - inhibit all inlining.  Force control through the slow-path
@ -1667,14 +2044,41 @@ void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpR
       biased_locking_exit(objReg, tmpReg, DONE_LABEL);
    }

-    cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
-    movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
-    jccb  (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
+#if INCLUDE_RTM_OPT
+    if (UseRTMForStackLocks && use_rtm) {
+      assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+      Label L_regular_unlock;
+      movptr(tmpReg, Address(objReg, 0));           // fetch markword
+      andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+      cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
+      jccb(Assembler::notEqual, L_regular_unlock);  // if !HLE RegularLock
+      xend();                                       // otherwise end...
+      jmp(DONE_LABEL);                              // ... and we're done
+      bind(L_regular_unlock);
+    }
+#endif

-    testptr(tmpReg, 0x02);                          // Inflated?
+    cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
+    jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
+    movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
+    testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?
    jccb  (Assembler::zero, Stacked);

    // It's inflated.
+#if INCLUDE_RTM_OPT
+    if (use_rtm) {
+      Label L_regular_inflated_unlock;
+      // Clean monitor_value bit to get valid pointer
+      int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+      movptr(boxReg, Address(tmpReg, owner_offset));
+      testptr(boxReg, boxReg);
+      jccb(Assembler::notZero, L_regular_inflated_unlock);
+      xend();
+      jmpb(DONE_LABEL);
+      bind(L_regular_inflated_unlock);
+    }
+#endif
+
    // Despite our balanced locking property we still check that m->_owner == Self
    // as java routines or native JNI code called by this thread might
    // have released the lock.
@ -2448,7 +2852,9 @@ void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
  Condition negated_cond = negate_condition(cond);
  Label L;
  jcc(negated_cond, L);
+  pushf(); // Preserve flags
  atomic_incl(counter_addr);
+  popf();
  bind(L);
 }

--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
@ -27,6 +27,7 @@

 #include "asm/assembler.hpp"
 #include "utilities/macros.hpp"
+#include "runtime/rtmLocking.hpp"


 // MacroAssembler extends Assembler by frequently used macros.
@ -111,7 +112,8 @@ class MacroAssembler: public Assembler {
        op == 0xE9 /* jmp */ ||
        op == 0xEB /* short jmp */ ||
        (op & 0xF0) == 0x70 /* short jcc */ ||
-        op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */,
+        op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ ||
+        op == 0xC7 && branch[1] == 0xF8 /* xbegin */,
        "Invalid opcode at patch point");

    if (op == 0xEB || (op & 0xF0) == 0x70) {
@ -121,7 +123,7 @@ class MacroAssembler: public Assembler {
      guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset");
      *disp = imm8;
    } else {
-      int* disp = (int*) &branch[(op == 0x0F)? 2: 1];
+      int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1];
      int imm32 = target - (address) &disp[1];
      *disp = imm32;
    }
@ -161,7 +163,6 @@ class MacroAssembler: public Assembler {
  void incrementq(Register reg, int value = 1);
  void incrementq(Address dst, int value = 1);

-
  // Support optimal SSE move instructions.
  void movflt(XMMRegister dst, XMMRegister src) {
    if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
@ -187,6 +188,8 @@ class MacroAssembler: public Assembler {
  void incrementl(AddressLiteral dst);
  void incrementl(ArrayAddress dst);

+  void incrementq(AddressLiteral dst);
+
  // Alignment
  void align(int modulus);

@ -654,8 +657,36 @@ class MacroAssembler: public Assembler {
 #ifdef COMPILER2
  // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
  // See full desription in macroAssembler_x86.cpp.
-  void fast_lock(Register obj, Register box, Register tmp, Register scr, BiasedLockingCounters* counters);
-  void fast_unlock(Register obj, Register box, Register tmp);
+  void fast_lock(Register obj, Register box, Register tmp,
+                 Register scr, Register cx1, Register cx2,
+                 BiasedLockingCounters* counters,
+                 RTMLockingCounters* rtm_counters,
+                 RTMLockingCounters* stack_rtm_counters,
+                 Metadata* method_data,
+                 bool use_rtm, bool profile_rtm);
+  void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
+#if INCLUDE_RTM_OPT
+  void rtm_counters_update(Register abort_status, Register rtm_counters);
+  void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
+  void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg,
+                                   RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data);
+  void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg,
+                     RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
+  void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel);
+  void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel);
+  void rtm_stack_locking(Register obj, Register tmp, Register scr,
+                         Register retry_on_abort_count,
+                         RTMLockingCounters* stack_rtm_counters,
+                         Metadata* method_data, bool profile_rtm,
+                         Label& DONE_LABEL, Label& IsInflated);
+  void rtm_inflated_locking(Register obj, Register box, Register tmp,
+                            Register scr, Register retry_on_busy_count,
+                            Register retry_on_abort_count,
+                            RTMLockingCounters* rtm_counters,
+                            Metadata* method_data, bool profile_rtm,
+                            Label& DONE_LABEL);
+#endif
 #endif

  Condition negate_condition(Condition cond);
@ -721,6 +752,7 @@ class MacroAssembler: public Assembler {


  void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
+  void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); }


  void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
@ -762,7 +794,14 @@ class MacroAssembler: public Assembler {
  // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
  void cond_inc32(Condition cond, AddressLiteral counter_addr);
  // Unconditional atomic increment.
-  void atomic_incl(AddressLiteral counter_addr);
+  void atomic_incl(Address counter_addr);
+  void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1);
+#ifdef _LP64
+  void atomic_incq(Address counter_addr);
+  void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1);
+#endif
+  void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr)) NOT_LP64(atomic_incl(counter_addr, scr)) ; }
+  void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; }

  void lea(Register dst, AddressLiteral adr);
  void lea(Address dst, AddressLiteral adr);
@ -1074,7 +1113,11 @@ public:

  void movptr(Register dst, Address src);

-  void movptr(Register dst, AddressLiteral src);
+#ifdef _LP64
+  void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1);
+#else
+  void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit
+#endif

  void movptr(Register dst, intptr_t src);
  void movptr(Register dst, Register src);
--- a/hotspot/src/cpu/x86/vm/rtmLocking.cpp
+++ b/hotspot/src/cpu/x86/vm/rtmLocking.cpp
@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/allocation.inline.hpp"
+#include "runtime/task.hpp"
+#include "runtime/rtmLocking.hpp"
+
+// One-shot PeriodicTask subclass for enabling RTM locking
+uintx RTMLockingCounters::_calculation_flag = 0;
+
+class RTMLockingCalculationTask : public PeriodicTask {
+ public:
+  RTMLockingCalculationTask(size_t interval_time) : PeriodicTask(interval_time){  }
+
+  virtual void task() {
+    RTMLockingCounters::_calculation_flag = 1;
+    // Reclaim our storage and disenroll ourself
+    delete this;
+  }
+};
+
+void RTMLockingCounters::init() {
+  if (UseRTMLocking && RTMLockingCalculationDelay > 0) {
+    RTMLockingCalculationTask* task = new RTMLockingCalculationTask(RTMLockingCalculationDelay);
+    task->enroll();
+  } else {
+    _calculation_flag = 1;
+  }
+}
+
+//------------------------------print_on-------------------------------
+void RTMLockingCounters::print_on(outputStream* st) {
+  tty->print_cr("# rtm locks total (estimated): " UINTX_FORMAT, _total_count * RTMTotalCountIncrRate);
+  tty->print_cr("# rtm lock aborts  : " UINTX_FORMAT, _abort_count);
+  for (int i = 0; i < ABORT_STATUS_LIMIT; i++) {
+    tty->print_cr("# rtm lock aborts %d: " UINTX_FORMAT, i, _abortX_count[i]);
+  }
+}
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
@ -1817,6 +1817,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  // Frame is now completed as far as size and linkage.
  int frame_complete = ((intptr_t)__ pc()) - start;

+  if (UseRTMLocking) {
+    // Abort RTM transaction before calling JNI
+    // because critical section will be large and will be
+    // aborted anyway. Also nmethod could be deoptimized.
+    __ xabort(0);
+  }
+
  // Calculate the difference between rsp and rbp,. We need to know it
  // after the native call because on windows Java Natives will pop
  // the arguments and it is painful to do rsp relative addressing
@ -3170,6 +3177,12 @@ void SharedRuntime::generate_uncommon_trap_blob() {
  };

  address start = __ pc();
+
+  if (UseRTMLocking) {
+    // Abort RTM transaction before possible nmethod deoptimization.
+    __ xabort(0);
+  }
+
  // Push self-frame.
  __ subptr(rsp, return_off*wordSize);     // Epilog!

@ -3355,6 +3368,14 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
  address call_pc = NULL;
  bool cause_return = (poll_type == POLL_AT_RETURN);
  bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
+
+  if (UseRTMLocking) {
+    // Abort RTM transaction before calling runtime
+    // because critical section will be large and will be
+    // aborted anyway. Also nmethod could be deoptimized.
+    __ xabort(0);
+  }
+
  // If cause_return is true we are at a poll_return and there is
  // the return address on the stack to the caller on the nmethod
  // that is safepoint. We can leave this return on the stack and
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
@ -2012,6 +2012,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  // Frame is now completed as far as size and linkage.
  int frame_complete = ((intptr_t)__ pc()) - start;

+    if (UseRTMLocking) {
+      // Abort RTM transaction before calling JNI
+      // because critical section will be large and will be
+      // aborted anyway. Also nmethod could be deoptimized.
+      __ xabort(0);
+    }
+
 #ifdef ASSERT
    {
      Label L;
@ -3612,6 +3619,11 @@ void SharedRuntime::generate_uncommon_trap_blob() {

  address start = __ pc();

+  if (UseRTMLocking) {
+    // Abort RTM transaction before possible nmethod deoptimization.
+    __ xabort(0);
+  }
+
  // Push self-frame.  We get here with a return address on the
  // stack, so rsp is 8-byte aligned until we allocate our frame.
  __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog!
@ -3792,6 +3804,13 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
  bool cause_return = (poll_type == POLL_AT_RETURN);
  bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);

+  if (UseRTMLocking) {
+    // Abort RTM transaction before calling runtime
+    // because critical section will be large and will be
+    // aborted anyway. Also nmethod could be deoptimized.
+    __ xabort(0);
+  }
+
  // Make room for return address (or push it again)
  if (!cause_return) {
    __ push(rbx);
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
@ -50,8 +50,13 @@ int VM_Version::_cpuFeatures;
 const char*           VM_Version::_features_str = "";
 VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };

+// Address of instruction which causes SEGV
+address VM_Version::_cpuinfo_segv_addr = 0;
+// Address of instruction after the one which causes SEGV
+address VM_Version::_cpuinfo_cont_addr = 0;
+
 static BufferBlob* stub_blob;
-static const int stub_size = 550;
+static const int stub_size = 600;

 extern "C" {
  typedef void (*getPsrInfo_stub_t)(void*);
@ -234,9 +239,9 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
    // Check if OS has enabled XGETBV instruction to access XCR0
    // (OSXSAVE feature flag) and CPU supports AVX
    //
-    __ andl(rcx, 0x18000000);
+    __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
    __ cmpl(rcx, 0x18000000);
-    __ jccb(Assembler::notEqual, sef_cpuid);
+    __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported

    //
    // XCR0, XFEATURE_ENABLED_MASK register
@ -247,6 +252,47 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
    __ movl(Address(rsi, 0), rax);
    __ movl(Address(rsi, 4), rdx);

+    __ andl(rax, 0x6); // xcr0 bits sse | ymm
+    __ cmpl(rax, 0x6);
+    __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
+
+    //
+    // Some OSs have a bug when upper 128bits of YMM
+    // registers are not restored after a signal processing.
+    // Generate SEGV here (reference through NULL)
+    // and check upper YMM bits after it.
+    //
+    VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
+
+    // load value into all 32 bytes of ymm7 register
+    __ movl(rcx, VM_Version::ymm_test_value());
+
+    __ movdl(xmm0, rcx);
+    __ pshufd(xmm0, xmm0, 0x00);
+    __ vinsertf128h(xmm0, xmm0, xmm0);
+    __ vmovdqu(xmm7, xmm0);
+#ifdef _LP64
+    __ vmovdqu(xmm8,  xmm0);
+    __ vmovdqu(xmm15, xmm0);
+#endif
+
+    __ xorl(rsi, rsi);
+    VM_Version::set_cpuinfo_segv_addr( __ pc() );
+    // Generate SEGV
+    __ movl(rax, Address(rsi, 0));
+
+    VM_Version::set_cpuinfo_cont_addr( __ pc() );
+    // Returns here after signal. Save xmm0 to check it later.
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
+    __ vmovdqu(Address(rsi,  0), xmm0);
+    __ vmovdqu(Address(rsi, 32), xmm7);
+#ifdef _LP64
+    __ vmovdqu(Address(rsi, 64), xmm8);
+    __ vmovdqu(Address(rsi, 96), xmm15);
+#endif
+
+    VM_Version::clean_cpuFeatures();
+
    //
    // cpuid(0x7) Structured Extended Features
    //
@ -429,7 +475,7 @@ void VM_Version::get_processor_features() {
  }

  char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
               cores_per_cpu(), threads_per_core(),
               cpu_family(), _model, _stepping,
               (supports_cmov() ? ", cmov" : ""),
@ -446,8 +492,9 @@ void VM_Version::get_processor_features() {
               (supports_avx()    ? ", avx" : ""),
               (supports_avx2()   ? ", avx2" : ""),
               (supports_aes()    ? ", aes" : ""),
-               (supports_clmul()    ? ", clmul" : ""),
+               (supports_clmul()  ? ", clmul" : ""),
               (supports_erms()   ? ", erms" : ""),
+               (supports_rtm()    ? ", rtm" : ""),
               (supports_mmx_ext() ? ", mmxext" : ""),
               (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
               (supports_lzcnt()   ? ", lzcnt": ""),
@ -488,7 +535,7 @@ void VM_Version::get_processor_features() {
    }
  } else if (UseAES) {
    if (!FLAG_IS_DEFAULT(UseAES))
-      warning("AES instructions not available on this CPU");
+      warning("AES instructions are not available on this CPU");
    FLAG_SET_DEFAULT(UseAES, false);
  }

@ -521,10 +568,57 @@ void VM_Version::get_processor_features() {
    }
  } else if (UseAESIntrinsics) {
    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
-      warning("AES intrinsics not available on this CPU");
+      warning("AES intrinsics are not available on this CPU");
    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
  }

+  // Adjust RTM (Restricted Transactional Memory) flags
+  if (!supports_rtm() && UseRTMLocking) {
+    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+    // setting during arguments processing. See use_biased_locking().
+    // VM_Version_init() is executed after UseBiasedLocking is used
+    // in Thread::allocate().
+    vm_exit_during_initialization("RTM instructions are not available on this CPU");
+  }
+
+#if INCLUDE_RTM_OPT
+  if (UseRTMLocking) {
+    if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
+      // RTM locking should be used only for applications with
+      // high lock contention. For now we do not use it by default.
+      vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
+    }
+    if (!is_power_of_2(RTMTotalCountIncrRate)) {
+      warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
+      FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
+    }
+    if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
+      warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
+      FLAG_SET_DEFAULT(RTMAbortRatio, 50);
+    }
+  } else { // !UseRTMLocking
+    if (UseRTMForStackLocks) {
+      if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
+        warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
+      }
+      FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
+    }
+    if (UseRTMDeopt) {
+      FLAG_SET_DEFAULT(UseRTMDeopt, false);
+    }
+    if (PrintPreciseRTMLockingStatistics) {
+      FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
+    }
+  }
+#else
+  if (UseRTMLocking) {
+    // Only C2 does RTM locking optimization.
+    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+    // setting during arguments processing. See use_biased_locking().
+    vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
+  }
+#endif
+
 #ifdef COMPILER2
  if (UseFPUForSpilling) {
    if (UseSSE < 2) {
@ -540,14 +634,28 @@ void VM_Version::get_processor_features() {
    if (MaxVectorSize > 32) {
      FLAG_SET_DEFAULT(MaxVectorSize, 32);
    }
-    if (MaxVectorSize > 16 && UseAVX == 0) {
-      // Only supported with AVX+
+    if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
+      // 32 bytes vectors (in YMM) are only supported with AVX+
      FLAG_SET_DEFAULT(MaxVectorSize, 16);
    }
    if (UseSSE < 2) {
-      // Only supported with SSE2+
+      // Vectors (in XMM) are only supported with SSE2+
      FLAG_SET_DEFAULT(MaxVectorSize, 0);
    }
+#ifdef ASSERT
+    if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
+      tty->print_cr("State of YMM registers after signal handle:");
+      int nreg = 2 LP64_ONLY(+2);
+      const char* ymm_name[4] = {"0", "7", "8", "15"};
+      for (int i = 0; i < nreg; i++) {
+        tty->print("YMM%s:", ymm_name[i]);
+        for (int j = 7; j >=0; j--) {
+          tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
+        }
+        tty->cr();
+      }
+    }
+#endif
  }
 #endif

@ -678,14 +786,6 @@ void VM_Version::get_processor_features() {
      }
    }
  }
-#if defined(COMPILER2) && defined(_ALLBSD_SOURCE)
-    if (MaxVectorSize > 16) {
-      // Limit vectors size to 16 bytes on BSD until it fixes
-      // restoring upper 128bit of YMM registers on return
-      // from signal handler.
-      FLAG_SET_DEFAULT(MaxVectorSize, 16);
-    }
-#endif // COMPILER2

  // Use count leading zeros count instruction if available.
  if (supports_lzcnt()) {
@ -814,6 +914,11 @@ void VM_Version::get_processor_features() {
    if (UseAES) {
      tty->print("  UseAES=1");
    }
+#ifdef COMPILER2
+    if (MaxVectorSize > 0) {
+      tty->print("  MaxVectorSize=%d", MaxVectorSize);
+    }
+#endif
    tty->cr();
    tty->print("Allocation");
    if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
@ -856,6 +961,27 @@ void VM_Version::get_processor_features() {
 #endif // !PRODUCT
 }

+bool VM_Version::use_biased_locking() {
+#if INCLUDE_RTM_OPT
+  // RTM locking is most useful when there is high lock contention and
+  // low data contention.  With high lock contention the lock is usually
+  // inflated and biased locking is not suitable for that case.
+  // RTM locking code requires that biased locking is off.
+  // Note: we can't switch off UseBiasedLocking in get_processor_features()
+  // because it is used by Thread::allocate() which is called before
+  // VM_Version::initialize().
+  if (UseRTMLocking && UseBiasedLocking) {
+    if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
+      FLAG_SET_DEFAULT(UseBiasedLocking, false);
+    } else {
+      warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
+      UseBiasedLocking = false;
+    }
+  }
+#endif
+  return UseBiasedLocking;
+}
+
 void VM_Version::initialize() {
  ResourceMark rm;
  // Making this stub must be FIRST use of assembler
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp
@ -207,7 +207,9 @@ public:
                        : 2,
                   bmi2 : 1,
                   erms : 1,
-                        : 22;
+                        : 1,
+                   rtm  : 1,
+                        : 20;
    } bits;
  };

@ -229,6 +231,9 @@ protected:
                               // 0 if this instruction is not available
  static const char* _features_str;

+  static address   _cpuinfo_segv_addr; // address of instruction which causes SEGV
+  static address   _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
+
  enum {
    CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
    CPU_CMOV   = (1 << 1),
@ -254,7 +259,8 @@ protected:
    CPU_ERMS   = (1 << 20), // enhanced 'rep movsb/stosb' instructions
    CPU_CLMUL  = (1 << 21), // carryless multiply for CRC
    CPU_BMI1   = (1 << 22),
-    CPU_BMI2   = (1 << 23)
+    CPU_BMI2   = (1 << 23),
+    CPU_RTM    = (1 << 24)  // Restricted Transactional Memory instructions
  } cpuFeatureFlags;

  enum {
@ -361,6 +367,9 @@ protected:
    // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
    XemXcr0Eax   xem_xcr0_eax;
    uint32_t     xem_xcr0_edx; // reserved
+
+    // Space to save ymm registers after signal handle
+    int          ymm_save[8*4]; // Save ymm0, ymm7, ymm8, ymm15
  };

  // The actual cpuid info block
@ -438,6 +447,8 @@ protected:
      result |= CPU_ERMS;
    if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
      result |= CPU_CLMUL;
+    if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
+      result |= CPU_RTM;

    // AMD features.
    if (is_amd()) {
@ -460,6 +471,21 @@ protected:
    return result;
  }

+  static bool os_supports_avx_vectors() {
+    if (!supports_avx()) {
+      return false;
+    }
+    // Verify that OS save/restore all bits of AVX registers
+    // during signal processing.
+    int nreg = 2 LP64_ONLY(+2);
+    for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
+      if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
  static void get_processor_features();

 public:
@ -476,10 +502,26 @@ public:
  static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
  static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
  static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
+  static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
+
+  // The value used to check ymm register after signal handle
+  static int ymm_test_value()    { return 0xCAFEBABE; }
+
+  static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
+  static bool  is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; }
+  static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
+  static address  cpuinfo_cont_addr()           { return _cpuinfo_cont_addr; }
+
+  static void clean_cpuFeatures()   { _cpuFeatures = 0; }
+  static void set_avx_cpuFeatures() { _cpuFeatures = (CPU_SSE | CPU_SSE2 | CPU_AVX); }
+

  // Initialization
  static void initialize();

+  // Override Abstract_VM_Version implementation
+  static bool use_biased_locking();
+
  // Asserts
  static void assert_is_initialized() {
    assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
@ -572,6 +614,7 @@ public:
  static bool supports_aes()      { return (_cpuFeatures & CPU_AES) != 0; }
  static bool supports_erms()     { return (_cpuFeatures & CPU_ERMS) != 0; }
  static bool supports_clmul()    { return (_cpuFeatures & CPU_CLMUL) != 0; }
+  static bool supports_rtm()      { return (_cpuFeatures & CPU_RTM) != 0; }
  static bool supports_bmi1()     { return (_cpuFeatures & CPU_BMI1) != 0; }
  static bool supports_bmi2()     { return (_cpuFeatures & CPU_BMI2) != 0; }
  // Intel features
--- a/hotspot/src/cpu/x86/vm/x86.ad
+++ b/hotspot/src/cpu/x86/vm/x86.ad
@ -474,7 +474,125 @@ reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM

 %}

+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+
+source_hpp %{
+// Header information of the source block.
+// Method declarations/definitions which are used outside
+// the ad-scope can conveniently be defined here.
+//
+// To keep related declarations/definitions/uses close together,
+// we switch between source %{ }% and source_hpp %{ }% freely as needed.
+
+class CallStubImpl {
+ 
+  //--------------------------------------------------------------
+  //---<  Used for optimization in Compile::shorten_branches  >---
+  //--------------------------------------------------------------
+
+ public:
+  // Size of call trampoline stub.
+  static uint size_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+  
+  // number of relocations needed by a call trampoline stub
+  static uint reloc_call_trampoline() { 
+    return 0; // no call trampolines on this platform
+  }
+};
+
+class HandlerImpl {
+
+ public:
+
+  static int emit_exception_handler(CodeBuffer &cbuf);
+  static int emit_deopt_handler(CodeBuffer& cbuf);
+
+  static uint size_exception_handler() {
+    // NativeCall instruction size is the same as NativeJump.
+    // exception handler starts out as jump and can be patched to
+    // a call be deoptimization.  (4932387)
+    // Note that this value is also credited (in output.cpp) to
+    // the size of the code section.
+    return NativeJump::instruction_size;
+  }
+
+#ifdef _LP64
+  static uint size_deopt_handler() {
+    // three 5 byte instructions
+    return 15;
+  }
+#else
+  static uint size_deopt_handler() {
+    // NativeCall instruction size is the same as NativeJump.
+    // exception handler starts out as jump and can be patched to
+    // a call be deoptimization.  (4932387)
+    // Note that this value is also credited (in output.cpp) to
+    // the size of the code section.
+    return 5 + NativeJump::instruction_size; // pushl(); jmp;
+  }
+#endif
+};
+
+%} // end source_hpp
+
 source %{
+
+// Emit exception handler code.
+// Stuff framesize into a register and call a VM stub routine.
+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
+
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base = __ start_a_stub(size_exception_handler());
+  if (base == NULL)  return 0;  // CodeBuffer::expand failed
+  int offset = __ offset();
+  __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
+  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+// Emit deopt handler code.
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
+
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base = __ start_a_stub(size_deopt_handler());
+  if (base == NULL)  return 0;  // CodeBuffer::expand failed
+  int offset = __ offset();
+
+#ifdef _LP64
+  address the_pc = (address) __ pc();
+  Label next;
+  // push a "the_pc" on the stack without destroying any registers
+  // as they all may be live.
+
+  // push address of "next"
+  __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
+  __ bind(next);
+  // adjust it so it matches "the_pc"
+  __ subptr(Address(rsp, 0), __ offset() - offset);
+#else
+  InternalAddress here(__ pc());
+  __ pushptr(here.addr());
+#endif
+
+  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+
+//=============================================================================
+
  // Float masks come from different places depending on platform.
 #ifdef _LP64
  static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
--- a/hotspot/src/cpu/x86/vm/x86_32.ad
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad
@ -1297,59 +1297,6 @@ uint MachUEPNode::size(PhaseRegAlloc *ra_) const {


 //=============================================================================
-uint size_exception_handler() {
-  // NativeCall instruction size is the same as NativeJump.
-  // exception handler starts out as jump and can be patched to
-  // a call be deoptimization.  (4932387)
-  // Note that this value is also credited (in output.cpp) to
-  // the size of the code section.
-  return NativeJump::instruction_size;
-}
-
-// Emit exception handler code.  Stuff framesize into a register
-// and call a VM stub routine.
-int emit_exception_handler(CodeBuffer& cbuf) {
-
-  // Note that the code buffer's insts_mark is always relative to insts.
-  // That's why we must use the macroassembler to generate a handler.
-  MacroAssembler _masm(&cbuf);
-  address base =
-  __ start_a_stub(size_exception_handler());
-  if (base == NULL)  return 0;  // CodeBuffer::expand failed
-  int offset = __ offset();
-  __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
-  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
-  __ end_a_stub();
-  return offset;
-}
-
-uint size_deopt_handler() {
-  // NativeCall instruction size is the same as NativeJump.
-  // exception handler starts out as jump and can be patched to
-  // a call be deoptimization.  (4932387)
-  // Note that this value is also credited (in output.cpp) to
-  // the size of the code section.
-  return 5 + NativeJump::instruction_size; // pushl(); jmp;
-}
-
-// Emit deopt handler code.
-int emit_deopt_handler(CodeBuffer& cbuf) {
-
-  // Note that the code buffer's insts_mark is always relative to insts.
-  // That's why we must use the macroassembler to generate a handler.
-  MacroAssembler _masm(&cbuf);
-  address base =
-  __ start_a_stub(size_exception_handler());
-  if (base == NULL)  return 0;  // CodeBuffer::expand failed
-  int offset = __ offset();
-  InternalAddress here(__ pc());
-  __ pushptr(here.addr());
-
-  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
-  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
-  __ end_a_stub();
-  return offset;
-}

 int Matcher::regnum_to_fpu_offset(int regnum) {
  return regnum - 32; // The FP registers are in the second chunk
@ -12925,13 +12872,31 @@ instruct RethrowException()

 // inlined locking and unlocking

+instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
+  predicate(Compile::current()->use_rtm());
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
+  ins_cost(300);
+  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
+                 $scr$$Register, $cx1$$Register, $cx2$$Register,
+                 _counters, _rtm_counters, _stack_rtm_counters,
+                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
+                 true, ra_->C->profile_rtm());
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
+  predicate(!Compile::current()->use_rtm());
  match(Set cr (FastLock object box));
  effect(TEMP tmp, TEMP scr, USE_KILL box);
  ins_cost(300);
  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
  ins_encode %{
-    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters);
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
+                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
  %}
  ins_pipe(pipe_slow);
 %}
@ -12942,7 +12907,7 @@ instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
  ins_cost(300);
  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
  ins_encode %{
-    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
+    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
  %}
  ins_pipe(pipe_slow);
 %}
--- a/hotspot/src/cpu/x86/vm/x86_64.ad
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad
@ -1439,66 +1439,9 @@ uint MachUEPNode::size(PhaseRegAlloc* ra_) const
  return MachNode::size(ra_); // too many variables; just compute it
                              // the hard way
 }
-
+ 

 //=============================================================================
-uint size_exception_handler()
-{
-  // NativeCall instruction size is the same as NativeJump.
-  // Note that this value is also credited (in output.cpp) to
-  // the size of the code section.
-  return NativeJump::instruction_size;
-}
-
-// Emit exception handler code.
-int emit_exception_handler(CodeBuffer& cbuf)
-{
-
-  // Note that the code buffer's insts_mark is always relative to insts.
-  // That's why we must use the macroassembler to generate a handler.
-  MacroAssembler _masm(&cbuf);
-  address base =
-  __ start_a_stub(size_exception_handler());
-  if (base == NULL)  return 0;  // CodeBuffer::expand failed
-  int offset = __ offset();
-  __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
-  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
-  __ end_a_stub();
-  return offset;
-}
-
-uint size_deopt_handler()
-{
-  // three 5 byte instructions
-  return 15;
-}
-
-// Emit deopt handler code.
-int emit_deopt_handler(CodeBuffer& cbuf)
-{
-
-  // Note that the code buffer's insts_mark is always relative to insts.
-  // That's why we must use the macroassembler to generate a handler.
-  MacroAssembler _masm(&cbuf);
-  address base =
-  __ start_a_stub(size_deopt_handler());
-  if (base == NULL)  return 0;  // CodeBuffer::expand failed
-  int offset = __ offset();
-  address the_pc = (address) __ pc();
-  Label next;
-  // push a "the_pc" on the stack without destroying any registers
-  // as they all may be live.
-
-  // push address of "next"
-  __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
-  __ bind(next);
-  // adjust it so it matches "the_pc"
-  __ subptr(Address(rsp, 0), __ offset() - offset);
-  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
-  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
-  __ end_a_stub();
-  return offset;
-}

 int Matcher::regnum_to_fpu_offset(int regnum)
 {
@ -11387,13 +11330,31 @@ instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
 // ============================================================================
 // inlined locking and unlocking

+instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
+  predicate(Compile::current()->use_rtm());
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
+  ins_cost(300);
+  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
+                 $scr$$Register, $cx1$$Register, $cx2$$Register,
+                 _counters, _rtm_counters, _stack_rtm_counters,
+                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
+                 true, ra_->C->profile_rtm());
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
+  predicate(!Compile::current()->use_rtm());
  match(Set cr (FastLock object box));
  effect(TEMP tmp, TEMP scr, USE_KILL box);
  ins_cost(300);
  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
  ins_encode %{
-    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters);
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
+                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
  %}
  ins_pipe(pipe_slow);
 %}
@ -11404,7 +11365,7 @@ instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
  ins_cost(300);
  format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
  ins_encode %{
-    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
+    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
  %}
  ins_pipe(pipe_slow);
 %}
--- a/hotspot/src/os/aix/vm/mutex_aix.inline.hpp
+++ b/hotspot/src/os/aix/vm/mutex_aix.inline.hpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -28,6 +28,6 @@

 #include "os_aix.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
-#include "thread_aix.inline.hpp"
+#include "runtime/thread.inline.hpp"

 #endif // OS_AIX_VM_MUTEX_AIX_INLINE_HPP
--- a/hotspot/src/os/aix/vm/os_aix.cpp
+++ b/hotspot/src/os/aix/vm/os_aix.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -61,10 +61,10 @@
 #include "runtime/statSampler.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/threadCritical.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
 #include "services/attachListener.hpp"
 #include "services/runtimeService.hpp"
-#include "thread_aix.inline.hpp"
 #include "utilities/decoder.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/events.hpp"
@ -3593,6 +3593,11 @@ void os::Aix::check_signal_handler(int sig) {
    tty->print_cr("  found:%s", get_signal_handler_name(thisHandler, buf, O_BUFLEN));
    // No need to check this sig any longer
    sigaddset(&check_signal_done, sig);
+    // Running under non-interactive shell, SHUTDOWN2_SIGNAL will be reassigned SIG_IGN
+    if (sig == SHUTDOWN2_SIGNAL && !isatty(fileno(stdin))) {
+      tty->print_cr("Running in non-interactive shell, %s handler is replaced by shell",
+                    exception_name(sig, buf, O_BUFLEN));
+    }
  } else if (os::Aix::get_our_sigflags(sig) != 0 && (int)act.sa_flags != os::Aix::get_our_sigflags(sig)) {
    tty->print("Warning: %s handler flags ", exception_name(sig, buf, O_BUFLEN));
    tty->print("expected:" PTR32_FORMAT, os::Aix::get_our_sigflags(sig));
--- a/hotspot/src/os/aix/vm/threadCritical_aix.cpp
+++ b/hotspot/src/os/aix/vm/threadCritical_aix.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -25,7 +25,7 @@

 #include "precompiled.hpp"
 #include "runtime/threadCritical.hpp"
-#include "thread_aix.inline.hpp"
+#include "runtime/thread.inline.hpp"

 // put OS-includes here
 # include <pthread.h>
--- a/hotspot/src/os/bsd/vm/os_bsd.cpp
+++ b/hotspot/src/os/bsd/vm/os_bsd.cpp
@ -3374,6 +3374,11 @@ void os::Bsd::check_signal_handler(int sig) {
    tty->print_cr("  found:%s", get_signal_handler_name(thisHandler, buf, O_BUFLEN));
    // No need to check this sig any longer
    sigaddset(&check_signal_done, sig);
+    // Running under non-interactive shell, SHUTDOWN2_SIGNAL will be reassigned SIG_IGN
+    if (sig == SHUTDOWN2_SIGNAL && !isatty(fileno(stdin))) {
+      tty->print_cr("Running in non-interactive shell, %s handler is replaced by shell",
+                    exception_name(sig, buf, O_BUFLEN));
+    }
  } else if(os::Bsd::get_our_sigflags(sig) != 0 && (int)act.sa_flags != os::Bsd::get_our_sigflags(sig)) {
    tty->print("Warning: %s handler flags ", exception_name(sig, buf, O_BUFLEN));
    tty->print("expected:" PTR32_FORMAT, os::Bsd::get_our_sigflags(sig));
--- a/hotspot/src/os/linux/vm/os_linux.cpp
+++ b/hotspot/src/os/linux/vm/os_linux.cpp
@ -109,6 +109,8 @@

 #define MAX_PATH    (2 * K)

+#define MAX_SECS 100000000
+
 // for timer info max values which include all bits
 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)

@ -2434,7 +2436,6 @@ class Semaphore : public StackObj {
    sem_t _semaphore;
 };

-
 Semaphore::Semaphore() {
  sem_init(&_semaphore, 0, 0);
 }
@ -2456,8 +2457,22 @@ bool Semaphore::trywait() {
 }

 bool Semaphore::timedwait(unsigned int sec, int nsec) {
+
  struct timespec ts;
-  unpackTime(&ts, false, (sec * NANOSECS_PER_SEC) + nsec);
+  // Semaphore's are always associated with CLOCK_REALTIME
+  os::Linux::clock_gettime(CLOCK_REALTIME, &ts);
+  // see unpackTime for discussion on overflow checking
+  if (sec >= MAX_SECS) {
+    ts.tv_sec += MAX_SECS;
+    ts.tv_nsec = 0;
+  } else {
+    ts.tv_sec += sec;
+    ts.tv_nsec += nsec;
+    if (ts.tv_nsec >= NANOSECS_PER_SEC) {
+      ts.tv_nsec -= NANOSECS_PER_SEC;
+      ++ts.tv_sec; // note: this must be <= max_secs
+    }
+  }

  while (1) {
    int result = sem_timedwait(&_semaphore, &ts);
@ -4560,6 +4575,11 @@ void os::Linux::check_signal_handler(int sig) {
    tty->print_cr("  found:%s", get_signal_handler_name(thisHandler, buf, O_BUFLEN));
    // No need to check this sig any longer
    sigaddset(&check_signal_done, sig);
+    // Running under non-interactive shell, SHUTDOWN2_SIGNAL will be reassigned SIG_IGN
+    if (sig == SHUTDOWN2_SIGNAL && !isatty(fileno(stdin))) {
+      tty->print_cr("Running in non-interactive shell, %s handler is replaced by shell",
+                    exception_name(sig, buf, O_BUFLEN));
+    }
  } else if(os::Linux::get_our_sigflags(sig) != 0 && (int)act.sa_flags != os::Linux::get_our_sigflags(sig)) {
    tty->print("Warning: %s handler flags ", exception_name(sig, buf, O_BUFLEN));
    tty->print("expected:" PTR32_FORMAT, os::Linux::get_our_sigflags(sig));
@ -5656,7 +5676,6 @@ void os::PlatformEvent::unpark() {
 * is no need to track notifications.
 */

-#define MAX_SECS 100000000
 /*
 * This code is common to linux and solaris and will be moved to a
 * common place in dolphin.
--- a/hotspot/src/os/solaris/vm/osThread_solaris.cpp
+++ b/hotspot/src/os/solaris/vm/osThread_solaris.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -41,7 +41,6 @@ void OSThread::pd_initialize() {
  _thread_id                         = 0;
  sigemptyset(&_caller_sigmask);

-  _saved_interrupt_thread_state      = _thread_new;
  _vm_created_thread                 = false;
 }

--- a/hotspot/src/os/solaris/vm/osThread_solaris.hpp
+++ b/hotspot/src/os/solaris/vm/osThread_solaris.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -82,20 +82,6 @@
  void set_ucontext(ucontext_t* ptr) { _ucontext = ptr; }
  static void SR_handler(Thread* thread, ucontext_t* uc);

- // ***************************************************************
- // java.lang.Thread.interrupt state.
- // ***************************************************************
-
- private:
-
-  JavaThreadState      _saved_interrupt_thread_state;       // the thread state before a system call -- restored afterward
-
- public:
-
-
-  JavaThreadState   saved_interrupt_thread_state()                              { return _saved_interrupt_thread_state; }
-  void              set_saved_interrupt_thread_state(JavaThreadState state)     { _saved_interrupt_thread_state = state; }
-
  static void       handle_spinlock_contention(int tries);                      // Used for thread local eden locking

  // ***************************************************************
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp
@ -311,33 +311,6 @@ struct tm* os::localtime_pd(const time_t* clock, struct tm*  res) {
  return localtime_r(clock, res);
 }

-// interruptible infrastructure
-
-// setup_interruptible saves the thread state before going into an
-// interruptible system call.
-// The saved state is used to restore the thread to
-// its former state whether or not an interrupt is received.
-// Used by classloader os::read
-// os::restartable_read calls skip this layer and stay in _thread_in_native
-
-void os::Solaris::setup_interruptible(JavaThread* thread) {
-
-  JavaThreadState thread_state = thread->thread_state();
-
-  assert(thread_state != _thread_blocked, "Coming from the wrong thread");
-  assert(thread_state != _thread_in_native, "Native threads skip setup_interruptible");
-  OSThread* osthread = thread->osthread();
-  osthread->set_saved_interrupt_thread_state(thread_state);
-  thread->frame_anchor()->make_walkable(thread);
-  ThreadStateTransition::transition(thread, thread_state, _thread_blocked);
-}
-
-JavaThread* os::Solaris::setup_interruptible() {
-  JavaThread* thread = (JavaThread*)ThreadLocalStorage::thread();
-  setup_interruptible(thread);
-  return thread;
-}
-
 void os::Solaris::try_enable_extended_io() {
  typedef int (*enable_extended_FILE_stdio_t)(int, int);

@ -353,41 +326,6 @@ void os::Solaris::try_enable_extended_io() {
  }
 }

-
-#ifdef ASSERT
-
-JavaThread* os::Solaris::setup_interruptible_native() {
-  JavaThread* thread = (JavaThread*)ThreadLocalStorage::thread();
-  JavaThreadState thread_state = thread->thread_state();
-  assert(thread_state == _thread_in_native, "Assumed thread_in_native");
-  return thread;
-}
-
-void os::Solaris::cleanup_interruptible_native(JavaThread* thread) {
-  JavaThreadState thread_state = thread->thread_state();
-  assert(thread_state == _thread_in_native, "Assumed thread_in_native");
-}
-#endif
-
-// cleanup_interruptible reverses the effects of setup_interruptible
-// setup_interruptible_already_blocked() does not need any cleanup.
-
-void os::Solaris::cleanup_interruptible(JavaThread* thread) {
-  OSThread* osthread = thread->osthread();
-
-  ThreadStateTransition::transition(thread, _thread_blocked, osthread->saved_interrupt_thread_state());
-}
-
-// I/O interruption related counters called in _INTERRUPTIBLE
-
-void os::Solaris::bump_interrupted_before_count() {
-  RuntimeService::record_interrupted_before_count();
-}
-
-void os::Solaris::bump_interrupted_during_count() {
-  RuntimeService::record_interrupted_during_count();
-}
-
 static int _processors_online = 0;

         jint os::Solaris::_os_thread_limit = 0;
@ -3366,11 +3304,20 @@ bool os::can_execute_large_page_memory() {

 // Read calls from inside the vm need to perform state transitions
 size_t os::read(int fd, void *buf, unsigned int nBytes) {
-  INTERRUPTIBLE_RETURN_INT_VM(::read(fd, buf, nBytes), os::Solaris::clear_interrupted);
+  size_t res;
+  JavaThread* thread = (JavaThread*)Thread::current();
+  assert(thread->thread_state() == _thread_in_vm, "Assumed _thread_in_vm");
+  ThreadBlockInVM tbiv(thread);
+  RESTARTABLE(::read(fd, buf, (size_t) nBytes), res);
+  return res;
 }

 size_t os::restartable_read(int fd, void *buf, unsigned int nBytes) {
-  INTERRUPTIBLE_RETURN_INT(::read(fd, buf, nBytes), os::Solaris::clear_interrupted);
+  size_t res;
+  assert(((JavaThread*)Thread::current())->thread_state() == _thread_in_native,
+          "Assumed _thread_in_native");
+  RESTARTABLE(::read(fd, buf, (size_t) nBytes), res);
+  return res;
 }

 void os::naked_short_sleep(jlong ms) {
@ -4471,6 +4418,11 @@ void os::Solaris::check_signal_handler(int sig) {
    tty->print_cr("  found:%s", get_signal_handler_name(thisHandler, buf, O_BUFLEN));
    // No need to check this sig any longer
    sigaddset(&check_signal_done, sig);
+    // Running under non-interactive shell, SHUTDOWN2_SIGNAL will be reassigned SIG_IGN
+    if (sig == SHUTDOWN2_SIGNAL && !isatty(fileno(stdin))) {
+      tty->print_cr("Running in non-interactive shell, %s handler is replaced by shell",
+                    exception_name(sig, buf, O_BUFLEN));
+    }
  } else if(os::Solaris::get_our_sigflags(sig) != 0 && act.sa_flags != os::Solaris::get_our_sigflags(sig)) {
    tty->print("Warning: %s handler flags ", exception_name(sig, buf, O_BUFLEN));
    tty->print("expected:" PTR32_FORMAT, os::Solaris::get_our_sigflags(sig));
@ -5305,6 +5257,8 @@ int os::fsync(int fd)  {
 }

 int os::available(int fd, jlong *bytes) {
+  assert(((JavaThread*)Thread::current())->thread_state() == _thread_in_native,
+          "Assumed _thread_in_native");
  jlong cur, end;
  int mode;
  struct stat64 buf64;
@ -5312,14 +5266,9 @@ int os::available(int fd, jlong *bytes) {
  if (::fstat64(fd, &buf64) >= 0) {
    mode = buf64.st_mode;
    if (S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
-      /*
-      * XXX: is the following call interruptible? If so, this might
-      * need to go through the INTERRUPT_IO() wrapper as for other
-      * blocking, interruptible calls in this file.
-      */
      int n,ioctl_return;

-      INTERRUPTIBLE(::ioctl(fd, FIONREAD, &n),ioctl_return,os::Solaris::clear_interrupted);
+      RESTARTABLE(::ioctl(fd, FIONREAD, &n), ioctl_return);
      if (ioctl_return>= 0) {
          *bytes = n;
        return 1;
@ -6250,7 +6199,11 @@ bool os::is_headless_jre() {
 }

 size_t os::write(int fd, const void *buf, unsigned int nBytes) {
-  INTERRUPTIBLE_RETURN_INT(::write(fd, buf, nBytes), os::Solaris::clear_interrupted);
+  size_t res;
+  assert(((JavaThread*)Thread::current())->thread_state() == _thread_in_native,
+          "Assumed _thread_in_native");
+  RESTARTABLE((size_t) ::write(fd, buf, (size_t) nBytes), res);
+  return res;
 }

 int os::close(int fd) {
@ -6262,11 +6215,15 @@ int os::socket_close(int fd) {
 }

 int os::recv(int fd, char* buf, size_t nBytes, uint flags) {
-  INTERRUPTIBLE_RETURN_INT((int)::recv(fd, buf, nBytes, flags), os::Solaris::clear_interrupted);
+  assert(((JavaThread*)Thread::current())->thread_state() == _thread_in_native,
+          "Assumed _thread_in_native");
+  RESTARTABLE_RETURN_INT((int)::recv(fd, buf, nBytes, flags));
 }

 int os::send(int fd, char* buf, size_t nBytes, uint flags) {
-  INTERRUPTIBLE_RETURN_INT((int)::send(fd, buf, nBytes, flags), os::Solaris::clear_interrupted);
+  assert(((JavaThread*)Thread::current())->thread_state() == _thread_in_native,
+          "Assumed _thread_in_native");
+  RESTARTABLE_RETURN_INT((int)::send(fd, buf, nBytes, flags));
 }

 int os::raw_send(int fd, char* buf, size_t nBytes, uint flags) {
@ -6287,11 +6244,14 @@ int os::timeout(int fd, long timeout) {
  pfd.fd = fd;
  pfd.events = POLLIN;

+  assert(((JavaThread*)Thread::current())->thread_state() == _thread_in_native,
+          "Assumed _thread_in_native");
+
  gettimeofday(&t, &aNull);
  prevtime = ((julong)t.tv_sec * 1000)  +  t.tv_usec / 1000;

  for(;;) {
-    INTERRUPTIBLE_NORESTART(::poll(&pfd, 1, timeout), res, os::Solaris::clear_interrupted);
+    res = ::poll(&pfd, 1, timeout);
    if(res == OS_ERR && errno == EINTR) {
        if(timeout != -1) {
          gettimeofday(&t, &aNull);
@ -6307,17 +6267,30 @@ int os::timeout(int fd, long timeout) {

 int os::connect(int fd, struct sockaddr *him, socklen_t len) {
  int _result;
-  INTERRUPTIBLE_NORESTART(::connect(fd, him, len), _result,\
-                          os::Solaris::clear_interrupted);
+  _result = ::connect(fd, him, len);

-  // Depending on when thread interruption is reset, _result could be
-  // one of two values when errno == EINTR
-
-  if (((_result == OS_INTRPT) || (_result == OS_ERR))
-      && (errno == EINTR)) {
+  // On Solaris, when a connect() call is interrupted, the connection
+  // can be established asynchronously (see 6343810). Subsequent calls
+  // to connect() must check the errno value which has the semantic
+  // described below (copied from the connect() man page). Handling
+  // of asynchronously established connections is required for both
+  // blocking and non-blocking sockets.
+  //     EINTR            The  connection  attempt  was   interrupted
+  //                      before  any data arrived by the delivery of
+  //                      a signal. The connection, however, will  be
+  //                      established asynchronously.
+  //
+  //     EINPROGRESS      The socket is non-blocking, and the connec-
+  //                      tion  cannot  be completed immediately.
+  //
+  //     EALREADY         The socket is non-blocking,  and a previous
+  //                      connection  attempt  has  not yet been com-
+  //                      pleted.
+  //
+  //     EISCONN          The socket is already connected.
+  if (_result == OS_ERR && errno == EINTR) {
     /* restarting a connect() changes its errno semantics */
-     INTERRUPTIBLE(::connect(fd, him, len), _result,\
-                   os::Solaris::clear_interrupted);
+     RESTARTABLE(::connect(fd, him, len), _result);
     /* undo these changes */
     if (_result == OS_ERR) {
       if (errno == EALREADY) {
@ -6335,20 +6308,23 @@ int os::accept(int fd, struct sockaddr* him, socklen_t* len) {
  if (fd < 0) {
    return OS_ERR;
  }
-  INTERRUPTIBLE_RETURN_INT((int)::accept(fd, him, len),\
-                           os::Solaris::clear_interrupted);
+  assert(((JavaThread*)Thread::current())->thread_state() == _thread_in_native,
+          "Assumed _thread_in_native");
+  RESTARTABLE_RETURN_INT((int)::accept(fd, him, len));
 }

 int os::recvfrom(int fd, char* buf, size_t nBytes, uint flags,
                 sockaddr* from, socklen_t* fromlen) {
-  INTERRUPTIBLE_RETURN_INT((int)::recvfrom(fd, buf, nBytes, flags, from, fromlen),\
-                           os::Solaris::clear_interrupted);
+  assert(((JavaThread*)Thread::current())->thread_state() == _thread_in_native,
+          "Assumed _thread_in_native");
+  RESTARTABLE_RETURN_INT((int)::recvfrom(fd, buf, nBytes, flags, from, fromlen));
 }

 int os::sendto(int fd, char* buf, size_t len, uint flags,
               struct sockaddr* to, socklen_t tolen) {
-  INTERRUPTIBLE_RETURN_INT((int)::sendto(fd, buf, len, flags, to, tolen),\
-                           os::Solaris::clear_interrupted);
+  assert(((JavaThread*)Thread::current())->thread_state() == _thread_in_native,
+          "Assumed _thread_in_native");
+  RESTARTABLE_RETURN_INT((int)::sendto(fd, buf, len, flags, to, tolen));
 }

 int os::socket_available(int fd, jint *pbytes) {
@ -6363,8 +6339,9 @@ int os::socket_available(int fd, jint *pbytes) {
 }

 int os::bind(int fd, struct sockaddr* him, socklen_t len) {
-   INTERRUPTIBLE_RETURN_INT_NORESTART(::bind(fd, him, len),\
-                                      os::Solaris::clear_interrupted);
+  assert(((JavaThread*)Thread::current())->thread_state() == _thread_in_native,
+          "Assumed _thread_in_native");
+   return ::bind(fd, him, len);
 }

 // Get the default path to the core file
--- a/hotspot/src/os/solaris/vm/os_solaris.hpp
+++ b/hotspot/src/os/solaris/vm/os_solaris.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -311,24 +311,6 @@ class Solaris {
                                       outdata, validity) : -1;
  }

-  enum {
-    clear_interrupted = true
-  };
-  static void setup_interruptible(JavaThread* thread);
-  static void setup_interruptible_already_blocked(JavaThread* thread);
-  static JavaThread* setup_interruptible();
-  static void cleanup_interruptible(JavaThread* thread);
-
-  // perf counter incrementers used by _INTERRUPTIBLE
-
-  static void bump_interrupted_before_count();
-  static void bump_interrupted_during_count();
-
-#ifdef ASSERT
-  static JavaThread* setup_interruptible_native();
-  static void cleanup_interruptible_native(JavaThread* thread);
-#endif
-
  static sigset_t* unblocked_signals();
  static sigset_t* vm_signals();
  static sigset_t* allowdebug_blocked_signals();
--- a/hotspot/src/os/solaris/vm/os_solaris.inline.hpp
+++ b/hotspot/src/os/solaris/vm/os_solaris.inline.hpp
@ -111,104 +111,7 @@ inline int os::closedir(DIR *dirp) {
 //////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////

-// macros for interruptible io and system calls and system call restarting
-
-#define _INTERRUPTIBLE(_setup, _cmd, _result, _thread, _clear, _before, _after, _int_enable) \
-do { \
-  _setup; \
-  _before; \
-  OSThread* _osthread = _thread->osthread(); \
-  if (_int_enable && _thread->has_last_Java_frame()) { \
-    /* this is java interruptible io stuff */ \
-    if (os::is_interrupted(_thread, _clear))  { \
-      os::Solaris::bump_interrupted_before_count(); \
-      _result = OS_INTRPT; \
-    } else { \
-      /* _cmd always expands to an assignment to _result */ \
-      if ((_cmd) < 0 && errno == EINTR  \
-       && os::is_interrupted(_thread, _clear)) { \
-        os::Solaris::bump_interrupted_during_count(); \
-        _result = OS_INTRPT; \
-      } \
-    } \
-  } else { \
-    /* this is normal blocking io stuff */ \
-    _cmd; \
-  } \
-  _after; \
-} while(false)
-
-// Interruptible io support + restarting of interrupted system calls
-
-#ifndef ASSERT
-
-#define INTERRUPTIBLE(_cmd, _result, _clear) do { \
-  _INTERRUPTIBLE( JavaThread* _thread = (JavaThread*)ThreadLocalStorage::thread(),_result = _cmd, _result, _thread, _clear, , , UseVMInterruptibleIO); \
-} while((_result == OS_ERR) && (errno == EINTR))
-
-#else
-
-// This adds an assertion that it is only called from thread_in_native
-// The call overhead is skipped for performance in product mode
-#define INTERRUPTIBLE(_cmd, _result, _clear) do { \
-  _INTERRUPTIBLE(JavaThread* _thread = os::Solaris::setup_interruptible_native(), _result = _cmd, _result, _thread, _clear, , os::Solaris::cleanup_interruptible_native(_thread), UseVMInterruptibleIO ); \
-} while((_result == OS_ERR) && (errno == EINTR))
-
-#endif
-
-// Used for calls from _thread_in_vm, not from _thread_in_native
-#define INTERRUPTIBLE_VM(_cmd, _result, _clear) do { \
-  _INTERRUPTIBLE(JavaThread* _thread = os::Solaris::setup_interruptible(), _result = _cmd, _result, _thread, _clear, , os::Solaris::cleanup_interruptible(_thread), UseVMInterruptibleIO ); \
-} while((_result == OS_ERR) && (errno == EINTR))
-
-/* Use NORESTART when the system call cannot return EINTR, when something other
-   than a system call is being invoked, or when the caller must do EINTR
-   handling. */
-
-#ifndef ASSERT
-
-#define INTERRUPTIBLE_NORESTART(_cmd, _result, _clear) \
-  _INTERRUPTIBLE( JavaThread* _thread = (JavaThread*)ThreadLocalStorage::thread(),_result = _cmd, _result, _thread, _clear, , , UseVMInterruptibleIO)
-
-#else
-
-// This adds an assertion that it is only called from thread_in_native
-// The call overhead is skipped for performance in product mode
-#define INTERRUPTIBLE_NORESTART(_cmd, _result, _clear) \
-  _INTERRUPTIBLE(JavaThread* _thread = os::Solaris::setup_interruptible_native(), _result = _cmd, _result, _thread, _clear, , os::Solaris::cleanup_interruptible_native(_thread), UseVMInterruptibleIO )
-
-#endif
-
-// Don't attend to UseVMInterruptibleIO. Always allow interruption.
-// Also assumes that it is called from the _thread_blocked state.
-// Used by os_sleep().
-
-#define INTERRUPTIBLE_NORESTART_VM_ALWAYS(_cmd, _result, _thread, _clear) \
-  _INTERRUPTIBLE(os::Solaris::setup_interruptible_already_blocked(_thread), _result = _cmd, _result, _thread, _clear, , , true )
-
-#define INTERRUPTIBLE_RETURN_INT(_cmd, _clear) do { \
-  int _result; \
-  do { \
-    INTERRUPTIBLE(_cmd, _result, _clear); \
-  } while((_result == OS_ERR) && (errno == EINTR)); \
-  return _result; \
-} while(false)
-
-#define INTERRUPTIBLE_RETURN_INT_VM(_cmd, _clear) do { \
-  int _result; \
-  do { \
-    INTERRUPTIBLE_VM(_cmd, _result, _clear); \
-  } while((_result == OS_ERR) && (errno == EINTR)); \
-  return _result; \
-} while(false)
-
-#define INTERRUPTIBLE_RETURN_INT_NORESTART(_cmd, _clear) do { \
-  int _result; \
-  INTERRUPTIBLE_NORESTART(_cmd, _result, _clear); \
-  return _result; \
-} while(false)
-
-/* Use the RESTARTABLE macros when interruptible io is not needed */
+// macros for restartable system calls

 #define RESTARTABLE(_cmd, _result) do { \
  do { \
--- a/hotspot/src/os/windows/vm/os_windows.cpp
+++ b/hotspot/src/os/windows/vm/os_windows.cpp
@ -2425,6 +2425,12 @@ LONG WINAPI topLevelExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo) {
    }
  }

+  if ((exception_code == EXCEPTION_ACCESS_VIOLATION) &&
+      VM_Version::is_cpuinfo_segv_addr(pc)) {
+    // Verify that OS save/restore AVX registers.
+    return Handle_Exception(exceptionInfo, VM_Version::cpuinfo_cont_addr());
+  }
+
  if (t != NULL && t->is_Java_thread()) {
    JavaThread* thread = (JavaThread*) t;
    bool in_java = thread->thread_state() == _thread_in_Java;
--- a/hotspot/src/os_cpu/aix_ppc/vm/os_aix_ppc.cpp
+++ b/hotspot/src/os_cpu/aix_ppc/vm/os_aix_ppc.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -49,8 +49,8 @@
 #include "runtime/osThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
-#include "thread_aix.inline.hpp"
 #include "utilities/events.hpp"
 #include "utilities/vmError.hpp"
 #ifdef COMPILER1
--- a/hotspot/src/os_cpu/aix_ppc/vm/threadLS_aix_ppc.cpp
+++ b/hotspot/src/os_cpu/aix_ppc/vm/threadLS_aix_ppc.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -25,14 +25,14 @@

 #include "precompiled.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#include "thread_aix.inline.hpp"
+#include "runtime/thread.hpp"

 void ThreadLocalStorage::generate_code_for_get_thread() {
-    // nothing we can do here for user-level thread
+  // Nothing we can do here for user-level thread.
 }

 void ThreadLocalStorage::pd_init() {
-  // Nothing to do
+  // Nothing to do.
 }

 void ThreadLocalStorage::pd_set_thread(Thread* thread) {
--- a/hotspot/src/os_cpu/aix_ppc/vm/thread_aix_ppc.cpp
+++ b/hotspot/src/os_cpu/aix_ppc/vm/thread_aix_ppc.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -24,8 +24,8 @@
 */

 #include "precompiled.hpp"
-#include "runtime/frame.inline.hpp"
-#include "thread_aix.inline.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/thread.hpp"

 // Forte Analyzer AsyncGetCallTrace profiling support is not implemented on Aix/PPC.
 bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava) {
--- a/hotspot/src/os_cpu/bsd_x86/vm/globals_bsd_x86.hpp
+++ b/hotspot/src/os_cpu/bsd_x86/vm/globals_bsd_x86.hpp
@ -42,7 +42,6 @@ define_pd_global(intx, VMThreadStackSize,        512);
 #endif // AMD64

 define_pd_global(intx, CompilerThreadStackSize,  0);
-define_pd_global(uintx, SurvivorRatio,           8);

 define_pd_global(uintx, JVMInvokeMethodSlack,    8192);

--- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp
+++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp
@ -492,6 +492,11 @@ JVM_handle_bsd_signal(int sig,
      }
    }

+    if ((sig == SIGSEGV || sig == SIGBUS) && VM_Version::is_cpuinfo_segv_addr(pc)) {
+      // Verify that OS save/restore AVX registers.
+      stub = VM_Version::cpuinfo_cont_addr();
+    }
+
    // We test if stub is already set (by the stack overflow code
    // above) so it is not overwritten by the code that follows. This
    // check is not required on other platforms, because on other
--- a/hotspot/src/os_cpu/linux_ppc/vm/thread_linux_ppc.cpp
+++ b/hotspot/src/os_cpu/linux_ppc/vm/thread_linux_ppc.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2014 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -24,8 +24,8 @@
 */

 #include "precompiled.hpp"
-#include "runtime/frame.inline.hpp"
-#include "thread_linux.inline.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/thread.hpp"

 // Forte Analyzer AsyncGetCallTrace profiling support is not implemented on Linux/PPC.
 bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava) {
--- a/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp
+++ b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp
@ -338,6 +338,11 @@ JVM_handle_linux_signal(int sig,
      }
    }

+    if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr(pc)) {
+      // Verify that OS save/restore AVX registers.
+      stub = VM_Version::cpuinfo_cont_addr();
+    }
+
    if (thread->thread_state() == _thread_in_Java) {
      // Java thread running in Java code => find exception handler if any
      // a fault inside compiled code, the interpreter, or a stub
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp
@ -459,6 +459,11 @@ JVM_handle_solaris_signal(int sig, siginfo_t* info, void* ucVoid,
      }
    }

+    if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr(pc)) {
+      // Verify that OS save/restore AVX registers.
+      stub = VM_Version::cpuinfo_cont_addr();
+    }
+
    if (thread->thread_state() == _thread_in_vm) {
      if (sig == SIGBUS && info->si_code == BUS_OBJERR && thread->doing_unsafe_access()) {
        stub = StubRoutines::handler_for_unsafe_access();
--- a/hotspot/src/share/vm/adlc/output_c.cpp
+++ b/hotspot/src/share/vm/adlc/output_c.cpp
@ -1582,6 +1582,8 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) {

      if( node->is_ideal_fastlock() && new_inst->is_ideal_fastlock() ) {
        fprintf(fp, "  ((MachFastLockNode*)n%d)->_counters = _counters;\n",cnt);
+        fprintf(fp, "  ((MachFastLockNode*)n%d)->_rtm_counters = _rtm_counters;\n",cnt);
+        fprintf(fp, "  ((MachFastLockNode*)n%d)->_stack_rtm_counters = _stack_rtm_counters;\n",cnt);
      }

      // Fill in the bottom_type where requested
@ -3963,6 +3965,8 @@ void ArchDesc::buildMachNode(FILE *fp_cpp, InstructForm *inst, const char *inden
  }
  if( inst->is_ideal_fastlock() ) {
    fprintf(fp_cpp, "%s node->_counters = _leaf->as_FastLock()->counters();\n", indent);
+    fprintf(fp_cpp, "%s node->_rtm_counters = _leaf->as_FastLock()->rtm_counters();\n", indent);
+    fprintf(fp_cpp, "%s node->_stack_rtm_counters = _leaf->as_FastLock()->stack_rtm_counters();\n", indent);
  }

 }
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
@ -2526,7 +2526,7 @@ void LIRGenerator::do_Goto(Goto* x) {
    // need to free up storage used for OSR entry point
    LIR_Opr osrBuffer = block()->next()->operand();
    BasicTypeList signature;
-    signature.append(T_INT);
+    signature.append(NOT_LP64(T_INT) LP64_ONLY(T_LONG)); // pass a pointer to osrBuffer
    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
    __ move(osrBuffer, cc->args()->at(0));
    __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_end),
--- a/hotspot/src/share/vm/c1/c1_globals.hpp
+++ b/hotspot/src/share/vm/c1/c1_globals.hpp
@ -308,9 +308,6 @@
  develop(intx, InstructionCountCutoff, 37000,                              \
          "If GraphBuilder adds this many instructions, bails out")         \
                                                                            \
-  product_pd(intx, SafepointPollOffset,                                     \
-          "Offset added to polling address (Intel only)")                   \
-                                                                            \
  develop(bool, ComputeExactFPURegisterUsage, true,                         \
          "Compute additional live set for fpu registers to simplify fpu stack merge (Intel only)") \
                                                                            \
--- a/hotspot/src/share/vm/ci/ciEnv.cpp
+++ b/hotspot/src/share/vm/ci/ciEnv.cpp
@ -961,7 +961,8 @@ void ciEnv::register_method(ciMethod* target,
                            AbstractCompiler* compiler,
                            int comp_level,
                            bool has_unsafe_access,
-                            bool has_wide_vectors) {
+                            bool has_wide_vectors,
+                            RTMState  rtm_state) {
  VM_ENTRY_MARK;
  nmethod* nm = NULL;
  {
@ -1002,6 +1003,15 @@ void ciEnv::register_method(ciMethod* target,

    methodHandle method(THREAD, target->get_Method());

+#if INCLUDE_RTM_OPT
+    if (!failing() && (rtm_state != NoRTM) &&
+        (method()->method_data() != NULL) &&
+        (method()->method_data()->rtm_state() != rtm_state)) {
+      // Preemptive decompile if rtm state was changed.
+      record_failure("RTM state change invalidated rtm code");
+    }
+#endif
+
    if (failing()) {
      // While not a true deoptimization, it is a preemptive decompile.
      MethodData* mdo = method()->method_data();
@ -1028,13 +1038,15 @@ void ciEnv::register_method(ciMethod* target,
                               frame_words, oop_map_set,
                               handler_table, inc_table,
                               compiler, comp_level);
-
    // Free codeBlobs
    code_buffer->free_blob();

    if (nm != NULL) {
      nm->set_has_unsafe_access(has_unsafe_access);
      nm->set_has_wide_vectors(has_wide_vectors);
+#if INCLUDE_RTM_OPT
+      nm->set_rtm_state(rtm_state);
+#endif

      // Record successful registration.
      // (Put nm into the task handle *before* publishing to the Java heap.)
--- a/hotspot/src/share/vm/ci/ciEnv.hpp
+++ b/hotspot/src/share/vm/ci/ciEnv.hpp
@ -365,7 +365,8 @@ public:
                       AbstractCompiler*         compiler,
                       int                       comp_level,
                       bool                      has_unsafe_access,
-                       bool                      has_wide_vectors);
+                       bool                      has_wide_vectors,
+                       RTMState                  rtm_state = NoRTM);


  // Access to certain well known ciObjects.
--- a/hotspot/src/share/vm/ci/ciMethodData.hpp
+++ b/hotspot/src/share/vm/ci/ciMethodData.hpp
@ -478,6 +478,18 @@ public:

  int invocation_count() { return _invocation_counter; }
  int backedge_count()   { return _backedge_counter;   }
+
+#if INCLUDE_RTM_OPT
+  // return cached value
+  int rtm_state() {
+    if (is_empty()) {
+      return NoRTM;
+    } else {
+      return get_MethodData()->rtm_state();
+    }
+  }
+#endif
+
  // Transfer information about the method to MethodData*.
  // would_profile means we would like to profile this method,
  // meaning it's not trivial.
--- a/hotspot/src/share/vm/classfile/javaClasses.hpp
+++ b/hotspot/src/share/vm/classfile/javaClasses.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -61,10 +61,6 @@ class java_lang_String : AllStatic {

  static Handle basic_create(int length, TRAPS);

-  static void set_value( oop string, typeArrayOop buffer) {
-    assert(initialized, "Must be initialized");
-    string->obj_field_put(value_offset,  (oop)buffer);
-  }
  static void set_offset(oop string, int offset) {
    assert(initialized, "Must be initialized");
    if (offset_offset > 0) {
@ -122,12 +118,26 @@ class java_lang_String : AllStatic {
    return hash_offset;
  }

+  static void set_value(oop string, typeArrayOop buffer) {
+    assert(initialized && (value_offset > 0), "Must be initialized");
+    string->obj_field_put(value_offset, (oop)buffer);
+  }
+  static void set_hash(oop string, unsigned int hash) {
+    assert(initialized && (hash_offset > 0), "Must be initialized");
+    string->int_field_put(hash_offset, hash);
+  }
+
  // Accessors
  static typeArrayOop value(oop java_string) {
    assert(initialized && (value_offset > 0), "Must be initialized");
    assert(is_instance(java_string), "must be java_string");
    return (typeArrayOop) java_string->obj_field(value_offset);
  }
+  static unsigned int hash(oop java_string) {
+    assert(initialized && (hash_offset > 0), "Must be initialized");
+    assert(is_instance(java_string), "must be java_string");
+    return java_string->int_field(hash_offset);
+  }
  static int offset(oop java_string) {
    assert(initialized, "Must be initialized");
    assert(is_instance(java_string), "must be java_string");
--- a/hotspot/src/share/vm/classfile/symbolTable.cpp
+++ b/hotspot/src/share/vm/classfile/symbolTable.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -35,6 +35,9 @@
 #include "oops/oop.inline2.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "utilities/hashtable.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1StringDedup.hpp"
+#endif

 // --------------------------------------------------------------------------

@ -728,6 +731,15 @@ oop StringTable::intern(Handle string_or_null, jchar* name,
    string = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
  }

+#if INCLUDE_ALL_GCS
+  if (G1StringDedup::is_enabled()) {
+    // Deduplicate the string before it is interned. Note that we should never
+    // deduplicate a string after it has been interned. Doing so will counteract
+    // compiler optimizations done on e.g. interned string literals.
+    G1StringDedup::deduplicate(string());
+  }
+#endif
+
  // Grab the StringTable_lock before getting the_table() because it could
  // change at safepoint.
  MutexLocker ml(StringTable_lock, THREAD);
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -497,6 +497,7 @@
  template(int_StringBuffer_signature,                "(I)Ljava/lang/StringBuffer;")                              \
  template(char_StringBuffer_signature,               "(C)Ljava/lang/StringBuffer;")                              \
  template(int_String_signature,                      "(I)Ljava/lang/String;")                                    \
+  template(codesource_permissioncollection_signature, "(Ljava/security/CodeSource;Ljava/security/PermissionCollection;)V") \
  /* signature symbols needed by intrinsics */                                                                    \
  VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, VM_SYMBOL_IGNORE, template, VM_ALIAS_IGNORE)            \
                                                                                                                  \
--- a/hotspot/src/share/vm/code/nmethod.cpp
+++ b/hotspot/src/share/vm/code/nmethod.cpp
@ -460,7 +460,9 @@ void nmethod::init_defaults() {
  _scavenge_root_link      = NULL;
  _scavenge_root_state     = 0;
  _compiler                = NULL;
-
+#if INCLUDE_RTM_OPT
+  _rtm_state               = NoRTM;
+#endif
 #ifdef HAVE_DTRACE_H
  _trap_offset             = 0;
 #endif // def HAVE_DTRACE_H
--- a/hotspot/src/share/vm/code/nmethod.hpp
+++ b/hotspot/src/share/vm/code/nmethod.hpp
@ -193,6 +193,12 @@ class nmethod : public CodeBlob {

  jbyte _scavenge_root_state;

+#if INCLUDE_RTM_OPT
+  // RTM state at compile time. Used during deoptimization to decide
+  // whether to restart collecting RTM locking abort statistic again.
+  RTMState _rtm_state;
+#endif
+
  // Nmethod Flushing lock. If non-zero, then the nmethod is not removed
  // and is not made into a zombie. However, once the nmethod is made into
  // a zombie, it will be locked one final time if CompiledMethodUnload
@ -414,6 +420,12 @@ class nmethod : public CodeBlob {
  bool  is_zombie() const                         { return _state == zombie; }
  bool  is_unloaded() const                       { return _state == unloaded;   }

+#if INCLUDE_RTM_OPT
+  // rtm state accessing and manipulating
+  RTMState  rtm_state() const                     { return _rtm_state; }
+  void set_rtm_state(RTMState state)              { _rtm_state = state; }
+#endif
+
  // Make the nmethod non entrant. The nmethod will continue to be
  // alive.  It is used when an uncommon trap happens.  Returns true
  // if this thread changed the state of the nmethod or false if
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@ -793,53 +793,6 @@ void CompactibleFreeListSpace::oop_iterate(ExtendedOopClosure* cl) {
  }
 }

-// Apply the given closure to each oop in the space \intersect memory region.
-void CompactibleFreeListSpace::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  assert_lock_strong(freelistLock());
-  if (is_empty()) {
-    return;
-  }
-  MemRegion cur = MemRegion(bottom(), end());
-  mr = mr.intersection(cur);
-  if (mr.is_empty()) {
-    return;
-  }
-  if (mr.equals(cur)) {
-    oop_iterate(cl);
-    return;
-  }
-  assert(mr.end() <= end(), "just took an intersection above");
-  HeapWord* obj_addr = block_start(mr.start());
-  HeapWord* t = mr.end();
-
-  SpaceMemRegionOopsIterClosure smr_blk(cl, mr);
-  if (block_is_obj(obj_addr)) {
-    // Handle first object specially.
-    oop obj = oop(obj_addr);
-    obj_addr += adjustObjectSize(obj->oop_iterate(&smr_blk));
-  } else {
-    FreeChunk* fc = (FreeChunk*)obj_addr;
-    obj_addr += fc->size();
-  }
-  while (obj_addr < t) {
-    HeapWord* obj = obj_addr;
-    obj_addr += block_size(obj_addr);
-    // If "obj_addr" is not greater than top, then the
-    // entire object "obj" is within the region.
-    if (obj_addr <= t) {
-      if (block_is_obj(obj)) {
-        oop(obj)->oop_iterate(cl);
-      }
-    } else {
-      // "obj" extends beyond end of region
-      if (block_is_obj(obj)) {
-        oop(obj)->oop_iterate(&smr_blk);
-      }
-      break;
-    }
-  }
-}
-
 // NOTE: In the following methods, in order to safely be able to
 // apply the closure to an object, we need to be sure that the
 // object has been initialized. We are guaranteed that an object
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
@ -351,7 +351,6 @@ class CompactibleFreeListSpace: public CompactibleSpace {
  Mutex* freelistLock() const { return &_freelistLock; }

  // Iteration support
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
  void oop_iterate(ExtendedOopClosure* cl);

  void object_iterate(ObjectClosure* blk);
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@ -2496,7 +2496,8 @@ void CMSCollector::save_heap_summary() {
 }

 void CMSCollector::report_heap_summary(GCWhen::Type when) {
-  _gc_tracer_cm->report_gc_heap_summary(when, _last_heap_summary, _last_metaspace_summary);
+  _gc_tracer_cm->report_gc_heap_summary(when, _last_heap_summary);
+  _gc_tracer_cm->report_metaspace_summary(when, _last_metaspace_summary);
 }

 void CMSCollector::collect_in_foreground(bool clear_all_soft_refs, GCCause::Cause cause) {
@ -3162,16 +3163,6 @@ ConcurrentMarkSweepGeneration::younger_refs_iterate(OopsInGenClosure* cl) {
  cl->reset_generation();
 }

-void
-ConcurrentMarkSweepGeneration::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  if (freelistLock()->owned_by_self()) {
-    Generation::oop_iterate(mr, cl);
-  } else {
-    MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
-    Generation::oop_iterate(mr, cl);
-  }
-}
-
 void
 ConcurrentMarkSweepGeneration::oop_iterate(ExtendedOopClosure* cl) {
  if (freelistLock()->owned_by_self()) {
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
@ -1285,7 +1285,6 @@ class ConcurrentMarkSweepGeneration: public CardGeneration {
  void save_sweep_limit();

  // More iteration support
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
  virtual void oop_iterate(ExtendedOopClosure* cl);
  virtual void safe_object_iterate(ObjectClosure* cl);
  virtual void object_iterate(ObjectClosure* cl);
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@ -1809,8 +1809,8 @@ class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  uint _regions_claimed;
  size_t _freed_bytes;
  FreeRegionList* _local_cleanup_list;
-  OldRegionSet* _old_proxy_set;
-  HumongousRegionSet* _humongous_proxy_set;
+  HeapRegionSetCount _old_regions_removed;
+  HeapRegionSetCount _humongous_regions_removed;
  HRRSCleanupTask* _hrrs_cleanup_task;
  double _claimed_region_time;
  double _max_region_time;
@ -1819,19 +1819,19 @@ public:
  G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
                             int worker_num,
                             FreeRegionList* local_cleanup_list,
-                             OldRegionSet* old_proxy_set,
-                             HumongousRegionSet* humongous_proxy_set,
                             HRRSCleanupTask* hrrs_cleanup_task) :
    _g1(g1), _worker_num(worker_num),
    _max_live_bytes(0), _regions_claimed(0),
    _freed_bytes(0),
    _claimed_region_time(0.0), _max_region_time(0.0),
    _local_cleanup_list(local_cleanup_list),
-    _old_proxy_set(old_proxy_set),
-    _humongous_proxy_set(humongous_proxy_set),
+    _old_regions_removed(),
+    _humongous_regions_removed(),
    _hrrs_cleanup_task(hrrs_cleanup_task) { }

  size_t freed_bytes() { return _freed_bytes; }
+  const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
+  const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }

  bool doHeapRegion(HeapRegion *hr) {
    if (hr->continuesHumongous()) {
@ -1844,13 +1844,22 @@ public:
    _regions_claimed++;
    hr->note_end_of_marking();
    _max_live_bytes += hr->max_live_bytes();
-    _g1->free_region_if_empty(hr,
-                              &_freed_bytes,
-                              _local_cleanup_list,
-                              _old_proxy_set,
-                              _humongous_proxy_set,
-                              _hrrs_cleanup_task,
-                              true /* par */);
+
+    if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
+      _freed_bytes += hr->used();
+      hr->set_containing_set(NULL);
+      if (hr->isHumongous()) {
+        assert(hr->startsHumongous(), "we should only see starts humongous");
+        _humongous_regions_removed.increment(1u, hr->capacity());
+        _g1->free_humongous_region(hr, _local_cleanup_list, true);
+      } else {
+        _old_regions_removed.increment(1u, hr->capacity());
+        _g1->free_region(hr, _local_cleanup_list, true);
+      }
+    } else {
+      hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
+    }
+
    double region_time = (os::elapsedTime() - start);
    _claimed_region_time += region_time;
    if (region_time > _max_region_time) {
@ -1883,12 +1892,8 @@ public:
  void work(uint worker_id) {
    double start = os::elapsedTime();
    FreeRegionList local_cleanup_list("Local Cleanup List");
-    OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
-    HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
    HRRSCleanupTask hrrs_cleanup_task;
    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
-                                           &old_proxy_set,
-                                           &humongous_proxy_set,
                                           &hrrs_cleanup_task);
    if (G1CollectedHeap::use_parallel_gc_threads()) {
      _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
@ -1900,13 +1905,10 @@ public:
    assert(g1_note_end.complete(), "Shouldn't have yielded!");

    // Now update the lists
-    _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
-                                            NULL /* free_list */,
-                                            &old_proxy_set,
-                                            &humongous_proxy_set,
-                                            true /* par */);
+    _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
    {
      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+      _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
      _max_live_bytes += g1_note_end.max_live_bytes();
      _freed_bytes += g1_note_end.freed_bytes();

@ -1920,14 +1922,14 @@ public:

      G1HRPrinter* hr_printer = _g1h->hr_printer();
      if (hr_printer->is_active()) {
-        HeapRegionLinkedListIterator iter(&local_cleanup_list);
+        FreeRegionListIterator iter(&local_cleanup_list);
        while (iter.more_available()) {
          HeapRegion* hr = iter.get_next();
          hr_printer->cleanup(hr);
        }
      }

-      _cleanup_list->add_as_tail(&local_cleanup_list);
+      _cleanup_list->add_ordered(&local_cleanup_list);
      assert(local_cleanup_list.is_empty(), "post-condition");

      HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
@ -1971,7 +1973,6 @@ void ConcurrentMark::cleanup() {
    return;
  }

-  HRSPhaseSetter x(HRSPhaseCleanup);
  g1h->verify_region_sets_optional();

  if (VerifyDuringGC) {
@ -2017,8 +2018,8 @@ void ConcurrentMark::cleanup() {
    // that calculated by walking the marking bitmap.

    // Bitmaps to hold expected values
-    BitMap expected_region_bm(_region_bm.size(), false);
-    BitMap expected_card_bm(_card_bm.size(), false);
+    BitMap expected_region_bm(_region_bm.size(), true);
+    BitMap expected_card_bm(_card_bm.size(), true);

    G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
                                                 &_region_bm,
@ -2144,7 +2145,7 @@ void ConcurrentMark::completeCleanup() {

  G1CollectedHeap* g1h = G1CollectedHeap::heap();

-  _cleanup_list.verify_optional();
+  _cleanup_list.verify_list();
  FreeRegionList tmp_free_list("Tmp Free List");

  if (G1ConcRegionFreeingVerbose) {
@ -2157,9 +2158,9 @@ void ConcurrentMark::completeCleanup() {
  // so it's not necessary to take any locks
  while (!_cleanup_list.is_empty()) {
    HeapRegion* hr = _cleanup_list.remove_head();
-    assert(hr != NULL, "the list was not empty");
+    assert(hr != NULL, "Got NULL from a non-empty list");
    hr->par_clear();
-    tmp_free_list.add_as_tail(hr);
+    tmp_free_list.add_ordered(hr);

    // Instead of adding one region at a time to the secondary_free_list,
    // we accumulate them in the local list and move them a few at a
@ -2179,7 +2180,7 @@ void ConcurrentMark::completeCleanup() {

      {
        MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
-        g1h->secondary_free_list_add_as_tail(&tmp_free_list);
+        g1h->secondary_free_list_add(&tmp_free_list);
        SecondaryFreeList_lock->notify_all();
      }

@ -2528,6 +2529,11 @@ void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
    assert(!rp->discovery_enabled(), "Post condition");
  }

+  if (has_overflown()) {
+    // We can not trust g1_is_alive if the marking stack overflowed
+    return;
+  }
+
  g1h->unlink_string_and_symbol_table(&g1_is_alive,
                                      /* process_strings */ false, // currently strings are always roots
                                      /* process_symbols */ true);
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@ -25,7 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP

-#include "gc_implementation/g1/heapRegionSets.hpp"
+#include "gc_implementation/g1/heapRegionSet.hpp"
 #include "utilities/taskqueue.hpp"

 class G1CollectedHeap;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BiasedArray.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BiasedArray.cpp
@ -24,6 +24,14 @@

 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1BiasedArray.hpp"
+#include "memory/padded.inline.hpp"
+
+// Allocate a new array, generic version.
+address G1BiasedMappedArrayBase::create_new_base_array(size_t length, size_t elem_size) {
+  assert(length > 0, "just checking");
+  assert(elem_size > 0, "just checking");
+  return PaddedPrimitiveArray<u_char, mtGC>::create_unfreeable(length * elem_size);
+}

 #ifndef PRODUCT
 void G1BiasedMappedArrayBase::verify_index(idx_t index) const {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BiasedArray.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BiasedArray.hpp
@ -25,8 +25,8 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1BIASEDARRAY_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1BIASEDARRAY_HPP

+#include "memory/allocation.hpp"
 #include "utilities/debug.hpp"
-#include "memory/allocation.inline.hpp"

 // Implements the common base functionality for arrays that contain provisions
 // for accessing its elements using a biased index.
@ -48,11 +48,7 @@ protected:
    _bias(0), _shift_by(0) { }

  // Allocate a new array, generic version.
-  static address create_new_base_array(size_t length, size_t elem_size) {
-    assert(length > 0, "just checking");
-    assert(elem_size > 0, "just checking");
-    return NEW_C_HEAP_ARRAY(u_char, length * elem_size, mtGC);
-  }
+  static address create_new_base_array(size_t length, size_t elem_size);

  // Initialize the members of this class. The biased start address of this array
  // is the bias (in elements) multiplied by the element size.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp
@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+
+#include "precompiled.hpp"
+#include "code/nmethod.hpp"
+#include "gc_implementation/g1/g1CodeCacheRemSet.hpp"
+#include "memory/iterator.hpp"
+
+G1CodeRootChunk::G1CodeRootChunk() : _top(NULL), _next(NULL), _prev(NULL) {
+  _top = bottom();
+}
+
+void G1CodeRootChunk::reset() {
+  _next = _prev = NULL;
+  _top = bottom();
+}
+
+void G1CodeRootChunk::nmethods_do(CodeBlobClosure* cl) {
+  nmethod** cur = bottom();
+  while (cur != _top) {
+    cl->do_code_blob(*cur);
+    cur++;
+  }
+}
+
+FreeList<G1CodeRootChunk> G1CodeRootSet::_free_list;
+size_t G1CodeRootSet::_num_chunks_handed_out = 0;
+
+G1CodeRootChunk* G1CodeRootSet::new_chunk() {
+  G1CodeRootChunk* result = _free_list.get_chunk_at_head();
+  if (result == NULL) {
+    result = new G1CodeRootChunk();
+  }
+  G1CodeRootSet::_num_chunks_handed_out++;
+  result->reset();
+  return result;
+}
+
+void G1CodeRootSet::free_chunk(G1CodeRootChunk* chunk) {
+  _free_list.return_chunk_at_head(chunk);
+  G1CodeRootSet::_num_chunks_handed_out--;
+}
+
+void G1CodeRootSet::free_all_chunks(FreeList<G1CodeRootChunk>* list) {
+  G1CodeRootSet::_num_chunks_handed_out -= list->count();
+  _free_list.prepend(list);
+}
+
+void G1CodeRootSet::purge_chunks(size_t keep_ratio) {
+  size_t keep = G1CodeRootSet::_num_chunks_handed_out * keep_ratio / 100;
+
+  if (keep >= (size_t)_free_list.count()) {
+    return;
+  }
+
+  FreeList<G1CodeRootChunk> temp;
+  temp.initialize();
+  temp.set_size(G1CodeRootChunk::word_size());
+
+  _free_list.getFirstNChunksFromList((size_t)_free_list.count() - keep, &temp);
+
+  G1CodeRootChunk* cur = temp.get_chunk_at_head();
+  while (cur != NULL) {
+    delete cur;
+    cur = temp.get_chunk_at_head();
+  }
+}
+
+size_t G1CodeRootSet::static_mem_size() {
+  return sizeof(_free_list) + sizeof(_num_chunks_handed_out);
+}
+
+size_t G1CodeRootSet::fl_mem_size() {
+  return _free_list.count() * _free_list.size();
+}
+
+void G1CodeRootSet::initialize() {
+  _free_list.initialize();
+  _free_list.set_size(G1CodeRootChunk::word_size());
+}
+
+G1CodeRootSet::G1CodeRootSet() : _list(), _length(0) {
+  _list.initialize();
+  _list.set_size(G1CodeRootChunk::word_size());
+}
+
+G1CodeRootSet::~G1CodeRootSet() {
+  clear();
+}
+
+void G1CodeRootSet::add(nmethod* method) {
+  if (!contains(method)) {
+    // Try to add the nmethod. If there is not enough space, get a new chunk.
+    if (_list.head() == NULL || _list.head()->is_full()) {
+      G1CodeRootChunk* cur = new_chunk();
+      _list.return_chunk_at_head(cur);
+    }
+    bool result = _list.head()->add(method);
+    guarantee(result, err_msg("Not able to add nmethod "PTR_FORMAT" to newly allocated chunk.", method));
+    _length++;
+  }
+}
+
+void G1CodeRootSet::remove(nmethod* method) {
+  G1CodeRootChunk* found = find(method);
+  if (found != NULL) {
+    bool result = found->remove(method);
+    guarantee(result, err_msg("could not find nmethod "PTR_FORMAT" during removal although we previously found it", method));
+    // eventually free completely emptied chunk
+    if (found->is_empty()) {
+      _list.remove_chunk(found);
+      free(found);
+    }
+    _length--;
+  }
+  assert(!contains(method), err_msg(PTR_FORMAT" still contains nmethod "PTR_FORMAT, this, method));
+}
+
+nmethod* G1CodeRootSet::pop() {
+  do {
+    G1CodeRootChunk* cur = _list.head();
+    if (cur == NULL) {
+      assert(_length == 0, "when there are no chunks, there should be no elements");
+      return NULL;
+    }
+    nmethod* result = cur->pop();
+    if (result != NULL) {
+      _length--;
+      return result;
+    } else {
+      free(_list.get_chunk_at_head());
+    }
+  } while (true);
+}
+
+G1CodeRootChunk* G1CodeRootSet::find(nmethod* method) {
+  G1CodeRootChunk* cur = _list.head();
+  while (cur != NULL) {
+    if (cur->contains(method)) {
+      return cur;
+    }
+    cur = (G1CodeRootChunk*)cur->next();
+  }
+  return NULL;
+}
+
+void G1CodeRootSet::free(G1CodeRootChunk* chunk) {
+  free_chunk(chunk);
+}
+
+bool G1CodeRootSet::contains(nmethod* method) {
+  return find(method) != NULL;
+}
+
+void G1CodeRootSet::clear() {
+  free_all_chunks(&_list);
+  _length = 0;
+}
+
+void G1CodeRootSet::nmethods_do(CodeBlobClosure* blk) const {
+  G1CodeRootChunk* cur = _list.head();
+  while (cur != NULL) {
+    cur->nmethods_do(blk);
+    cur = (G1CodeRootChunk*)cur->next();
+  }
+}
+
+size_t G1CodeRootSet::mem_size() {
+  return sizeof(this) + _list.count() * _list.size();
+}
+
+#ifndef PRODUCT
+
+void G1CodeRootSet::test() {
+  initialize();
+
+  assert(_free_list.count() == 0, "Free List must be empty");
+  assert(_num_chunks_handed_out == 0, "No elements must have been handed out yet");
+
+  // The number of chunks that we allocate for purge testing.
+  size_t const num_chunks = 10;
+  {
+    G1CodeRootSet set1;
+    assert(set1.is_empty(), "Code root set must be initially empty but is not.");
+
+    set1.add((nmethod*)1);
+    assert(_num_chunks_handed_out == 1,
+           err_msg("Must have allocated and handed out one chunk, but handed out "
+                   SIZE_FORMAT" chunks", _num_chunks_handed_out));
+    assert(set1.length() == 1, err_msg("Added exactly one element, but set contains "
+                                       SIZE_FORMAT" elements", set1.length()));
+
+    // G1CodeRootChunk::word_size() is larger than G1CodeRootChunk::num_entries which
+    // we cannot access.
+    for (uint i = 0; i < G1CodeRootChunk::word_size() + 1; i++) {
+      set1.add((nmethod*)1);
+    }
+    assert(_num_chunks_handed_out == 1,
+           err_msg("Duplicate detection must have prevented allocation of further "
+                   "chunks but contains "SIZE_FORMAT, _num_chunks_handed_out));
+    assert(set1.length() == 1,
+           err_msg("Duplicate detection should not have increased the set size but "
+                   "is "SIZE_FORMAT, set1.length()));
+
+    size_t num_total_after_add = G1CodeRootChunk::word_size() + 1;
+    for (size_t i = 0; i < num_total_after_add - 1; i++) {
+      set1.add((nmethod*)(2 + i));
+    }
+    assert(_num_chunks_handed_out > 1,
+           "After adding more code roots, more than one chunks should have been handed out");
+    assert(set1.length() == num_total_after_add,
+           err_msg("After adding in total "SIZE_FORMAT" distinct code roots, they "
+                   "need to be in the set, but there are only "SIZE_FORMAT,
+                   num_total_after_add, set1.length()));
+
+    size_t num_popped = 0;
+    while (set1.pop() != NULL) {
+      num_popped++;
+    }
+    assert(num_popped == num_total_after_add,
+           err_msg("Managed to pop "SIZE_FORMAT" code roots, but only "SIZE_FORMAT" "
+                   "were added", num_popped, num_total_after_add));
+    assert(_num_chunks_handed_out == 0,
+           err_msg("After popping all elements, all chunks must have been returned "
+                   "but are still "SIZE_FORMAT, _num_chunks_handed_out));
+
+    purge_chunks(0);
+    assert(_free_list.count() == 0,
+           err_msg("After purging everything, the free list must be empty but still "
+                   "contains "SIZE_FORMAT" chunks", _free_list.count()));
+
+    // Add some more handed out chunks.
+    size_t i = 0;
+    while (_num_chunks_handed_out < num_chunks) {
+      set1.add((nmethod*)i);
+      i++;
+    }
+
+    {
+      // Generate chunks on the free list.
+      G1CodeRootSet set2;
+      size_t i = 0;
+      while (_num_chunks_handed_out < num_chunks * 2) {
+        set2.add((nmethod*)i);
+        i++;
+      }
+      // Exit of the scope of the set2 object will call the destructor that generates
+      // num_chunks elements on the free list.
+    }
+
+    assert(_num_chunks_handed_out == num_chunks,
+           err_msg("Deletion of the second set must have resulted in giving back "
+                   "those, but there is still "SIZE_FORMAT" handed out, expecting "
+                   SIZE_FORMAT, _num_chunks_handed_out, num_chunks));
+    assert((size_t)_free_list.count() == num_chunks,
+           err_msg("After freeing "SIZE_FORMAT" chunks, they must be on the free list "
+                   "but there are only "SIZE_FORMAT, num_chunks, _free_list.count()));
+
+    size_t const test_percentage = 50;
+    purge_chunks(test_percentage);
+    assert(_num_chunks_handed_out == num_chunks,
+           err_msg("Purging must not hand out chunks but there are "SIZE_FORMAT,
+                   _num_chunks_handed_out));
+    assert((size_t)_free_list.count() == (ssize_t)(num_chunks * test_percentage / 100),
+           err_msg("Must have purged "SIZE_FORMAT" percent of "SIZE_FORMAT" chunks"
+                   "but there are "SSIZE_FORMAT, test_percentage, num_chunks,
+                   _free_list.count()));
+    // Purge the remainder of the chunks on the free list.
+    purge_chunks(0);
+    assert(_free_list.count() == 0, "Free List must be empty");
+    assert(_num_chunks_handed_out == num_chunks,
+           err_msg("Expected to be "SIZE_FORMAT" chunks handed out from the first set "
+                   "but there are "SIZE_FORMAT, num_chunks, _num_chunks_handed_out));
+
+    // Exit of the scope of the set1 object will call the destructor that generates
+    // num_chunks additional elements on the free list.
+  }
+
+  assert(_num_chunks_handed_out == 0,
+         err_msg("Deletion of the only set must have resulted in no chunks handed "
+                 "out, but there is still "SIZE_FORMAT" handed out", _num_chunks_handed_out));
+  assert((size_t)_free_list.count() == num_chunks,
+         err_msg("After freeing "SIZE_FORMAT" chunks, they must be on the free list "
+                 "but there are only "SSIZE_FORMAT, num_chunks, _free_list.count()));
+
+  // Restore initial state.
+  purge_chunks(0);
+  assert(_free_list.count() == 0, "Free List must be empty");
+  assert(_num_chunks_handed_out == 0, "No elements must have been handed out yet");
+}
+
+void TestCodeCacheRemSet_test() {
+  G1CodeRootSet::test();
+}
+#endif
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp
@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1CODECACHEREMSET_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1CODECACHEREMSET_HPP
+
+#include "memory/allocation.hpp"
+#include "memory/freeList.hpp"
+#include "runtime/globals.hpp"
+
+class CodeBlobClosure;
+
+class G1CodeRootChunk : public CHeapObj<mtGC> {
+ private:
+  static const int NUM_ENTRIES = 32;
+ public:
+  G1CodeRootChunk*     _next;
+  G1CodeRootChunk*     _prev;
+
+  nmethod** _top;
+
+  nmethod* _data[NUM_ENTRIES];
+
+  nmethod** bottom() const {
+    return (nmethod**) &(_data[0]);
+  }
+
+  nmethod** end() const {
+    return (nmethod**) &(_data[NUM_ENTRIES]);
+  }
+
+ public:
+  G1CodeRootChunk();
+  ~G1CodeRootChunk() {}
+
+  static size_t word_size() { return (size_t)(align_size_up_(sizeof(G1CodeRootChunk), HeapWordSize) / HeapWordSize); }
+
+  // FreeList "interface" methods
+
+  G1CodeRootChunk* next() const         { return _next; }
+  G1CodeRootChunk* prev() const         { return _prev; }
+  void set_next(G1CodeRootChunk* v)     { _next = v; assert(v != this, "Boom");}
+  void set_prev(G1CodeRootChunk* v)     { _prev = v; assert(v != this, "Boom");}
+  void clear_next()       { set_next(NULL); }
+  void clear_prev()       { set_prev(NULL); }
+
+  size_t size() const { return word_size(); }
+
+  void link_next(G1CodeRootChunk* ptr)  { set_next(ptr); }
+  void link_prev(G1CodeRootChunk* ptr)  { set_prev(ptr); }
+  void link_after(G1CodeRootChunk* ptr) {
+    link_next(ptr);
+    if (ptr != NULL) ptr->link_prev((G1CodeRootChunk*)this);
+  }
+
+  bool is_free()                 { return true; }
+
+  // New G1CodeRootChunk routines
+
+  void reset();
+
+  bool is_empty() const {
+    return _top == bottom();
+  }
+
+  bool is_full() const {
+    return _top == (nmethod**)end();
+  }
+
+  bool contains(nmethod* method) {
+    nmethod** cur = bottom();
+    while (cur != _top) {
+      if (*cur == method) return true;
+      cur++;
+    }
+    return false;
+  }
+
+  bool add(nmethod* method) {
+    if (is_full()) return false;
+    *_top = method;
+    _top++;
+    return true;
+  }
+
+  bool remove(nmethod* method) {
+    nmethod** cur = bottom();
+    while (cur != _top) {
+      if (*cur == method) {
+        memmove(cur, cur + 1, (_top - (cur + 1)) * sizeof(nmethod**));
+        _top--;
+        return true;
+      }
+      cur++;
+    }
+    return false;
+  }
+
+  void nmethods_do(CodeBlobClosure* blk);
+
+  nmethod* pop() {
+    if (is_empty()) {
+      return NULL;
+    }
+    _top--;
+    return *_top;
+  }
+};
+
+// Implements storage for a set of code roots.
+// All methods that modify the set are not thread-safe except if otherwise noted.
+class G1CodeRootSet VALUE_OBJ_CLASS_SPEC {
+ private:
+  // Global free chunk list management
+  static FreeList<G1CodeRootChunk> _free_list;
+  // Total number of chunks handed out
+  static size_t _num_chunks_handed_out;
+
+  static G1CodeRootChunk* new_chunk();
+  static void free_chunk(G1CodeRootChunk* chunk);
+  // Free all elements of the given list.
+  static void free_all_chunks(FreeList<G1CodeRootChunk>* list);
+
+  // Return the chunk that contains the given nmethod, NULL otherwise.
+  // Scans the list of chunks backwards, as this method is used to add new
+  // entries, which are typically added in bulk for a single nmethod.
+  G1CodeRootChunk* find(nmethod* method);
+  void free(G1CodeRootChunk* chunk);
+
+  size_t _length;
+  FreeList<G1CodeRootChunk> _list;
+
+ public:
+  G1CodeRootSet();
+  ~G1CodeRootSet();
+
+  static void initialize();
+  static void purge_chunks(size_t keep_ratio);
+
+  static size_t static_mem_size();
+  static size_t fl_mem_size();
+
+  // Search for the code blob from the recently allocated ones to find duplicates more quickly, as this
+  // method is likely to be repeatedly called with the same nmethod.
+  void add(nmethod* method);
+
+  void remove(nmethod* method);
+  nmethod* pop();
+
+  bool contains(nmethod* method);
+
+  void clear();
+
+  void nmethods_do(CodeBlobClosure* blk) const;
+
+  bool is_empty() { return length() == 0; }
+
+  // Length in elements
+  size_t length() const { return _length; }
+
+  // Memory size in bytes taken by this set.
+  size_t mem_size();
+
+  static void test() PRODUCT_RETURN;
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1CODECACHEREMSET_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@ -39,6 +39,7 @@
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
 #include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
@ -169,14 +170,6 @@ public:
  int calls() { return _calls; }
 };

-class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure {
-public:
-  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
-    *card_ptr = CardTableModRefBS::dirty_card_val();
-    return true;
-  }
-};
-
 YoungList::YoungList(G1CollectedHeap* g1h) :
    _g1h(g1h), _head(NULL), _length(0), _last_sampled_rs_lengths(0),
    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0) {
@ -524,7 +517,7 @@ G1CollectedHeap* G1CollectedHeap::_g1h;
 // Private methods.

 HeapRegion*
-G1CollectedHeap::new_region_try_secondary_free_list() {
+G1CollectedHeap::new_region_try_secondary_free_list(bool is_old) {
  MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
  while (!_secondary_free_list.is_empty() || free_regions_coming()) {
    if (!_secondary_free_list.is_empty()) {
@ -540,7 +533,7 @@ G1CollectedHeap::new_region_try_secondary_free_list() {

      assert(!_free_list.is_empty(), "if the secondary_free_list was not "
             "empty we should have moved at least one entry to the free_list");
-      HeapRegion* res = _free_list.remove_head();
+      HeapRegion* res = _free_list.remove_region(is_old);
      if (G1ConcRegionFreeingVerbose) {
        gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : "
                               "allocated "HR_FORMAT" from secondary_free_list",
@ -562,7 +555,7 @@ G1CollectedHeap::new_region_try_secondary_free_list() {
  return NULL;
 }

-HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool do_expand) {
+HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool is_old, bool do_expand) {
  assert(!isHumongous(word_size) || word_size <= HeapRegion::GrainWords,
         "the only time we use this to allocate a humongous region is "
         "when we are allocating a single humongous region");
@ -574,19 +567,21 @@ HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool do_expand) {
        gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : "
                               "forced to look at the secondary_free_list");
      }
-      res = new_region_try_secondary_free_list();
+      res = new_region_try_secondary_free_list(is_old);
      if (res != NULL) {
        return res;
      }
    }
  }
-  res = _free_list.remove_head_or_null();
+
+  res = _free_list.remove_region(is_old);
+
  if (res == NULL) {
    if (G1ConcRegionFreeingVerbose) {
      gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : "
                             "res == NULL, trying the secondary_free_list");
    }
-    res = new_region_try_secondary_free_list();
+    res = new_region_try_secondary_free_list(is_old);
  }
  if (res == NULL && do_expand && _expand_heap_after_alloc_failure) {
    // Currently, only attempts to allocate GC alloc regions set
@ -603,12 +598,9 @@ HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool do_expand) {
    if (expand(word_size * HeapWordSize)) {
      // Given that expand() succeeded in expanding the heap, and we
      // always expand the heap by an amount aligned to the heap
-      // region size, the free list should in theory not be empty. So
-      // it would probably be OK to use remove_head(). But the extra
-      // check for NULL is unlikely to be a performance issue here (we
-      // just expanded the heap!) so let's just be conservative and
-      // use remove_head_or_null().
-      res = _free_list.remove_head_or_null();
+      // region size, the free list should in theory not be empty.
+      // In either case remove_region() will check for NULL.
+      res = _free_list.remove_region(is_old);
    } else {
      _expand_heap_after_alloc_failure = false;
    }
@ -626,7 +618,7 @@ uint G1CollectedHeap::humongous_obj_allocate_find_first(uint num_regions,
    // Only one region to allocate, no need to go through the slower
    // path. The caller will attempt the expansion if this fails, so
    // let's not try to expand here too.
-    HeapRegion* hr = new_region(word_size, false /* do_expand */);
+    HeapRegion* hr = new_region(word_size, true /* is_old */, false /* do_expand */);
    if (hr != NULL) {
      first = hr->hrs_index();
    } else {
@ -1298,7 +1290,6 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,

  size_t metadata_prev_used = MetaspaceAux::allocated_used_bytes();

-  HRSPhaseSetter x(HRSPhaseFullGC);
  verify_region_sets_optional();

  const bool do_clear_all_soft_refs = clear_all_soft_refs ||
@ -1928,10 +1919,10 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
  _g1mm(NULL),
  _refine_cte_cl(NULL),
  _full_collection(false),
-  _free_list("Master Free List"),
-  _secondary_free_list("Secondary Free List"),
-  _old_set("Old Set"),
-  _humongous_set("Master Humongous Set"),
+  _free_list("Master Free List", new MasterFreeRegionListMtSafeChecker()),
+  _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()),
+  _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()),
+  _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()),
  _free_regions_coming(false),
  _young_list(new YoungList(this)),
  _gc_time_stamp(0),
@ -1963,7 +1954,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
  int n_queues = MAX2((int)ParallelGCThreads, 1);
  _task_queues = new RefToScanQueueSet(n_queues);

-  int n_rem_sets = HeapRegionRemSet::num_par_rem_sets();
+  uint n_rem_sets = HeapRegionRemSet::num_par_rem_sets();
  assert(n_rem_sets > 0, "Invariant.");

  _worker_cset_start_region = NEW_C_HEAP_ARRAY(HeapRegion*, n_queues, mtGC);
@ -2079,7 +2070,7 @@ jint G1CollectedHeap::initialize() {
  guarantee(HeapRegion::CardsPerRegion < max_cards_per_region,
            "too many cards per region");

-  HeapRegionSet::set_unrealistically_long_length(max_regions() + 1);
+  FreeRegionList::set_unrealistically_long_length(max_regions() + 1);

  _bot_shared = new G1BlockOffsetSharedArray(_reserved,
                                             heap_word_size(init_byte_size));
@ -2182,6 +2173,8 @@ jint G1CollectedHeap::initialize() {
  // values in the heap have been properly initialized.
  _g1mm = new G1MonitoringSupport(this);

+  G1StringDedup::initialize();
+
  return JNI_OK;
 }

@ -2369,8 +2362,12 @@ public:
 };

 size_t G1CollectedHeap::recalculate_used() const {
+  double recalculate_used_start = os::elapsedTime();
+
  SumUsedClosure blk;
  heap_region_iterate(&blk);
+
+  g1_policy()->phase_times()->record_evac_fail_recalc_used_time((os::elapsedTime() - recalculate_used_start) * 1000.0);
  return blk.result();
 }

@ -3462,6 +3459,11 @@ void G1CollectedHeap::verify(bool silent, VerifyOption vo) {
    if (!silent) gclog_or_tty->print("RemSet ");
    rem_set()->verify();

+    if (G1StringDedup::is_enabled()) {
+      if (!silent) gclog_or_tty->print("StrDedup ");
+      G1StringDedup::verify();
+    }
+
    if (failures) {
      gclog_or_tty->print_cr("Heap:");
      // It helps to have the per-region information in the output to
@ -3479,8 +3481,13 @@ void G1CollectedHeap::verify(bool silent, VerifyOption vo) {
    }
    guarantee(!failures, "there should not have been any failures");
  } else {
-    if (!silent)
-      gclog_or_tty->print("(SKIPPING roots, heapRegionSets, heapRegions, remset) ");
+    if (!silent) {
+      gclog_or_tty->print("(SKIPPING Roots, HeapRegionSets, HeapRegions, RemSet");
+      if (G1StringDedup::is_enabled()) {
+        gclog_or_tty->print(", StrDedup");
+      }
+      gclog_or_tty->print(") ");
+    }
  }
 }

@ -3573,6 +3580,9 @@ void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
  st->cr();
  _cm->print_worker_threads_on(st);
  _cg1r->print_worker_threads_on(st);
+  if (G1StringDedup::is_enabled()) {
+    G1StringDedup::print_worker_threads_on(st);
+  }
 }

 void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const {
@ -3581,6 +3591,9 @@ void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const {
  }
  tc->do_thread(_cmThread);
  _cg1r->threads_do(tc);
+  if (G1StringDedup::is_enabled()) {
+    G1StringDedup::threads_do(tc);
+  }
 }

 void G1CollectedHeap::print_tracing_info() const {
@ -3887,7 +3900,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
  print_heap_before_gc();
  trace_heap_before_gc(_gc_tracer_stw);

-  HRSPhaseSetter x(HRSPhaseEvacuation);
  verify_region_sets_optional();
  verify_dirty_young_regions();

@ -4386,6 +4398,8 @@ void G1CollectedHeap::finalize_for_evac_failure() {
 void G1CollectedHeap::remove_self_forwarding_pointers() {
  assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");

+  double remove_self_forwards_start = os::elapsedTime();
+
  G1ParRemoveSelfForwardPtrsTask rsfp_task(this);

  if (G1CollectedHeap::use_parallel_gc_threads()) {
@ -4413,6 +4427,8 @@ void G1CollectedHeap::remove_self_forwarding_pointers() {
  }
  _objs_with_preserved_marks.clear(true);
  _preserved_marks_of_objs.clear(true);
+
+  g1_policy()->phase_times()->record_evac_fail_remove_self_forwards((os::elapsedTime() - remove_self_forwards_start) * 1000.0);
 }

 void G1CollectedHeap::push_on_evac_failure_scan_stack(oop obj) {
@ -4634,9 +4650,7 @@ bool G1ParScanThreadState::verify_task(StarTask ref) const {
 #endif // ASSERT

 void G1ParScanThreadState::trim_queue() {
-  assert(_evac_cl != NULL, "not set");
  assert(_evac_failure_cl != NULL, "not set");
-  assert(_partial_scan_cl != NULL, "not set");

  StarTask ref;
  do {
@ -4727,6 +4741,12 @@ oop G1ParScanThreadState::copy_to_survivor_space(oop const old) {
  oop forward_ptr = old->forward_to_atomic(obj);
  if (forward_ptr == NULL) {
    Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
+
+    // alloc_purpose is just a hint to allocate() above, recheck the type of region
+    // we actually allocated from and update alloc_purpose accordingly
+    HeapRegion* to_region = _g1h->heap_region_containing_raw(obj_ptr);
+    alloc_purpose = to_region->is_young() ? GCAllocForSurvived : GCAllocForTenured;
+
    if (g1p->track_object_age(alloc_purpose)) {
      // We could simply do obj->incr_age(). However, this causes a
      // performance issue. obj->incr_age() will first check whether
@ -4754,6 +4774,13 @@ oop G1ParScanThreadState::copy_to_survivor_space(oop const old) {
      obj->set_mark(m);
    }

+    if (G1StringDedup::is_enabled()) {
+      G1StringDedup::enqueue_from_evacuation(from_region->is_young(),
+                                             to_region->is_young(),
+                                             queue_num(),
+                                             obj);
+    }
+
    size_t* surv_young_words = surviving_young_words();
    surv_young_words[young_index] += word_sz;

@ -4832,55 +4859,6 @@ void G1ParCopyClosure<barrier, do_mark_object>::do_oop_work(T* p) {
 template void G1ParCopyClosure<G1BarrierEvac, false>::do_oop_work(oop* p);
 template void G1ParCopyClosure<G1BarrierEvac, false>::do_oop_work(narrowOop* p);

-template <class T> void G1ParScanPartialArrayClosure::do_oop_nv(T* p) {
-  assert(has_partial_array_mask(p), "invariant");
-  oop from_obj = clear_partial_array_mask(p);
-
-  assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
-  assert(from_obj->is_objArray(), "must be obj array");
-  objArrayOop from_obj_array = objArrayOop(from_obj);
-  // The from-space object contains the real length.
-  int length                 = from_obj_array->length();
-
-  assert(from_obj->is_forwarded(), "must be forwarded");
-  oop to_obj                 = from_obj->forwardee();
-  assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
-  objArrayOop to_obj_array   = objArrayOop(to_obj);
-  // We keep track of the next start index in the length field of the
-  // to-space object.
-  int next_index             = to_obj_array->length();
-  assert(0 <= next_index && next_index < length,
-         err_msg("invariant, next index: %d, length: %d", next_index, length));
-
-  int start                  = next_index;
-  int end                    = length;
-  int remainder              = end - start;
-  // We'll try not to push a range that's smaller than ParGCArrayScanChunk.
-  if (remainder > 2 * ParGCArrayScanChunk) {
-    end = start + ParGCArrayScanChunk;
-    to_obj_array->set_length(end);
-    // Push the remainder before we process the range in case another
-    // worker has run out of things to do and can steal it.
-    oop* from_obj_p = set_partial_array_mask(from_obj);
-    _par_scan_state->push_on_queue(from_obj_p);
-  } else {
-    assert(length == end, "sanity");
-    // We'll process the final range for this object. Restore the length
-    // so that the heap remains parsable in case of evacuation failure.
-    to_obj_array->set_length(end);
-  }
-  _scanner.set_region(_g1->heap_region_containing_raw(to_obj));
-  // Process indexes [start,end). It will also process the header
-  // along with the first chunk (i.e., the chunk with start == 0).
-  // Note that at this point the length field of to_obj_array is not
-  // correct given that we are using it to keep track of the next
-  // start index. oop_iterate_range() (thankfully!) ignores the length
-  // field and only relies on the start / end parameters.  It does
-  // however return the size of the object which will be incorrect. So
-  // we have to ignore it even if we wanted to use it.
-  to_obj_array->oop_iterate_range(&_scanner, start, end);
-}
-
 class G1ParEvacuateFollowersClosure : public VoidClosure {
 protected:
  G1CollectedHeap*              _g1h;
@ -5022,13 +5000,9 @@ public:
      ReferenceProcessor*             rp = _g1h->ref_processor_stw();

      G1ParScanThreadState            pss(_g1h, worker_id, rp);
-      G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
      G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
-      G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);

-      pss.set_evac_closure(&scan_evac_cl);
      pss.set_evac_failure_closure(&evac_failure_cl);
-      pss.set_partial_scan_closure(&partial_scan_cl);

      G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
      G1ParScanMetadataClosure       only_scan_metadata_cl(_g1h, &pss, rp);
@ -5270,6 +5244,33 @@ void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive
                           g1_unlink_task.strings_processed(), g1_unlink_task.strings_removed(),
                           g1_unlink_task.symbols_processed(), g1_unlink_task.symbols_removed());
  }
+
+  if (G1StringDedup::is_enabled()) {
+    G1StringDedup::unlink(is_alive);
+  }
+}
+
+class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure {
+public:
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    *card_ptr = CardTableModRefBS::dirty_card_val();
+    return true;
+  }
+};
+
+void G1CollectedHeap::redirty_logged_cards() {
+  guarantee(G1DeferredRSUpdate, "Must only be called when using deferred RS updates.");
+  double redirty_logged_cards_start = os::elapsedTime();
+
+  RedirtyLoggedCardTableEntryFastClosure redirty;
+  dirty_card_queue_set().set_closure(&redirty);
+  dirty_card_queue_set().apply_closure_to_all_completed_buffers();
+
+  DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set();
+  dcq.merge_bufferlists(&dirty_card_queue_set());
+  assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
+
+  g1_policy()->phase_times()->record_redirty_logged_cards_time_ms((os::elapsedTime() - redirty_logged_cards_start) * 1000.0);
 }

 // Weak Reference Processing support
@ -5453,14 +5454,9 @@ public:
    G1STWIsAliveClosure is_alive(_g1h);

    G1ParScanThreadState            pss(_g1h, worker_id, NULL);
-
-    G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, NULL);
    G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
-    G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, NULL);

-    pss.set_evac_closure(&scan_evac_cl);
    pss.set_evac_failure_closure(&evac_failure_cl);
-    pss.set_partial_scan_closure(&partial_scan_cl);

    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
    G1ParScanMetadataClosure       only_copy_metadata_cl(_g1h, &pss, NULL);
@ -5565,13 +5561,9 @@ public:
    HandleMark   hm;

    G1ParScanThreadState            pss(_g1h, worker_id, NULL);
-    G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, NULL);
    G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
-    G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, NULL);

-    pss.set_evac_closure(&scan_evac_cl);
    pss.set_evac_failure_closure(&evac_failure_cl);
-    pss.set_partial_scan_closure(&partial_scan_cl);

    assert(pss.refs()->is_empty(), "both queue and overflow should be empty");

@ -5695,13 +5687,9 @@ void G1CollectedHeap::process_discovered_references(uint no_of_gc_workers) {
  // We do not embed a reference processor in the copying/scanning
  // closures while we're actually processing the discovered
  // reference objects.
-  G1ParScanHeapEvacClosure        scan_evac_cl(this, &pss, NULL);
  G1ParScanHeapEvacFailureClosure evac_failure_cl(this, &pss, NULL);
-  G1ParScanPartialArrayClosure    partial_scan_cl(this, &pss, NULL);

-  pss.set_evac_closure(&scan_evac_cl);
  pss.set_evac_failure_closure(&evac_failure_cl);
-  pss.set_partial_scan_closure(&partial_scan_cl);

  assert(pss.refs()->is_empty(), "pre-condition");

@ -5883,6 +5871,9 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) {
    G1STWIsAliveClosure is_alive(this);
    G1KeepAliveClosure keep_alive(this);
    JNIHandles::weak_oops_do(&is_alive, &keep_alive);
+    if (G1StringDedup::is_enabled()) {
+      G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive);
+    }
  }

  release_gc_alloc_regions(n_workers, evacuation_info);
@ -5900,6 +5891,8 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) {
  // strong code roots for a particular heap region.
  migrate_strong_code_roots();

+  purge_code_root_memory();
+
  if (g1_policy()->during_initial_mark_pause()) {
    // Reset the claim values set during marking the strong code roots
    reset_heap_region_claim_values();
@ -5926,41 +5919,15 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) {
  enqueue_discovered_references(n_workers);

  if (G1DeferredRSUpdate) {
-    RedirtyLoggedCardTableEntryFastClosure redirty;
-    dirty_card_queue_set().set_closure(&redirty);
-    dirty_card_queue_set().apply_closure_to_all_completed_buffers();
-
-    DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set();
-    dcq.merge_bufferlists(&dirty_card_queue_set());
-    assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
+    redirty_logged_cards();
  }
  COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 }

-void G1CollectedHeap::free_region_if_empty(HeapRegion* hr,
-                                     size_t* pre_used,
-                                     FreeRegionList* free_list,
-                                     OldRegionSet* old_proxy_set,
-                                     HumongousRegionSet* humongous_proxy_set,
-                                     HRRSCleanupTask* hrrs_cleanup_task,
-                                     bool par) {
-  if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
-    if (hr->isHumongous()) {
-      assert(hr->startsHumongous(), "we should only see starts humongous");
-      free_humongous_region(hr, pre_used, free_list, humongous_proxy_set, par);
-    } else {
-      _old_set.remove_with_proxy(hr, old_proxy_set);
-      free_region(hr, pre_used, free_list, par);
-    }
-  } else {
-    hr->rem_set()->do_cleanup_work(hrrs_cleanup_task);
-  }
-}
-
 void G1CollectedHeap::free_region(HeapRegion* hr,
-                                  size_t* pre_used,
                                  FreeRegionList* free_list,
-                                  bool par) {
+                                  bool par,
+                                  bool locked) {
  assert(!hr->isHumongous(), "this is only for non-humongous regions");
  assert(!hr->is_empty(), "the region should not be empty");
  assert(free_list != NULL, "pre-condition");
@ -5971,72 +5938,58 @@ void G1CollectedHeap::free_region(HeapRegion* hr,
  if (!hr->is_young()) {
    _cg1r->hot_card_cache()->reset_card_counts(hr);
  }
-  *pre_used += hr->used();
-  hr->hr_clear(par, true /* clear_space */);
-  free_list->add_as_head(hr);
+  hr->hr_clear(par, true /* clear_space */, locked /* locked */);
+  free_list->add_ordered(hr);
 }

 void G1CollectedHeap::free_humongous_region(HeapRegion* hr,
-                                     size_t* pre_used,
                                     FreeRegionList* free_list,
-                                     HumongousRegionSet* humongous_proxy_set,
                                     bool par) {
  assert(hr->startsHumongous(), "this is only for starts humongous regions");
  assert(free_list != NULL, "pre-condition");
-  assert(humongous_proxy_set != NULL, "pre-condition");

-  size_t hr_used = hr->used();
  size_t hr_capacity = hr->capacity();
-  size_t hr_pre_used = 0;
-  _humongous_set.remove_with_proxy(hr, humongous_proxy_set);
  // We need to read this before we make the region non-humongous,
  // otherwise the information will be gone.
  uint last_index = hr->last_hc_index();
  hr->set_notHumongous();
-  free_region(hr, &hr_pre_used, free_list, par);
+  free_region(hr, free_list, par);

  uint i = hr->hrs_index() + 1;
  while (i < last_index) {
    HeapRegion* curr_hr = region_at(i);
    assert(curr_hr->continuesHumongous(), "invariant");
    curr_hr->set_notHumongous();
-    free_region(curr_hr, &hr_pre_used, free_list, par);
+    free_region(curr_hr, free_list, par);
    i += 1;
  }
-  assert(hr_pre_used == hr_used,
-         err_msg("hr_pre_used: "SIZE_FORMAT" and hr_used: "SIZE_FORMAT" "
-                 "should be the same", hr_pre_used, hr_used));
-  *pre_used += hr_pre_used;
 }

-void G1CollectedHeap::update_sets_after_freeing_regions(size_t pre_used,
-                                       FreeRegionList* free_list,
-                                       OldRegionSet* old_proxy_set,
-                                       HumongousRegionSet* humongous_proxy_set,
-                                       bool par) {
-  if (pre_used > 0) {
-    Mutex* lock = (par) ? ParGCRareEvent_lock : NULL;
-    MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
-    assert(_summary_bytes_used >= pre_used,
-           err_msg("invariant: _summary_bytes_used: "SIZE_FORMAT" "
-                   "should be >= pre_used: "SIZE_FORMAT,
-                   _summary_bytes_used, pre_used));
-    _summary_bytes_used -= pre_used;
+void G1CollectedHeap::remove_from_old_sets(const HeapRegionSetCount& old_regions_removed,
+                                       const HeapRegionSetCount& humongous_regions_removed) {
+  if (old_regions_removed.length() > 0 || humongous_regions_removed.length() > 0) {
+    MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
+    _old_set.bulk_remove(old_regions_removed);
+    _humongous_set.bulk_remove(humongous_regions_removed);
  }
-  if (free_list != NULL && !free_list->is_empty()) {
+
+}
+
+void G1CollectedHeap::prepend_to_freelist(FreeRegionList* list) {
+  assert(list != NULL, "list can't be null");
+  if (!list->is_empty()) {
    MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag);
-    _free_list.add_as_head(free_list);
-  }
-  if (old_proxy_set != NULL && !old_proxy_set->is_empty()) {
-    MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
-    _old_set.update_from_proxy(old_proxy_set);
-  }
-  if (humongous_proxy_set != NULL && !humongous_proxy_set->is_empty()) {
-    MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
-    _humongous_set.update_from_proxy(humongous_proxy_set);
+    _free_list.add_ordered(list);
  }
 }

+void G1CollectedHeap::decrement_summary_bytes(size_t bytes) {
+  assert(_summary_bytes_used >= bytes,
+         err_msg("invariant: _summary_bytes_used: "SIZE_FORMAT" should be >= bytes: "SIZE_FORMAT,
+                  _summary_bytes_used, bytes));
+  _summary_bytes_used -= bytes;
+}
+
 class G1ParCleanupCTTask : public AbstractGangTask {
  G1SATBCardTableModRefBS* _ct_bs;
  G1CollectedHeap* _g1h;
@ -6194,7 +6147,7 @@ void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& e
      }
    }

-    rs_lengths += cur->rem_set()->occupied();
+    rs_lengths += cur->rem_set()->occupied_locked();

    HeapRegion* next = cur->next_in_collection_set();
    assert(cur->in_collection_set(), "bad CS");
@ -6227,7 +6180,8 @@ void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& e

      // And the region is empty.
      assert(!used_mr.is_empty(), "Should not have empty regions in a CS.");
-      free_region(cur, &pre_used, &local_free_list, false /* par */);
+      pre_used += cur->used();
+      free_region(cur, &local_free_list, false /* par */, true /* locked */);
    } else {
      cur->uninstall_surv_rate_group();
      if (cur->is_young()) {
@ -6255,10 +6209,8 @@ void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& e
    young_time_ms += elapsed_ms;
  }

-  update_sets_after_freeing_regions(pre_used, &local_free_list,
-                                    NULL /* old_proxy_set */,
-                                    NULL /* humongous_proxy_set */,
-                                    false /* par */);
+  prepend_to_freelist(&local_free_list);
+  decrement_summary_bytes(pre_used);
  policy->phase_times()->record_young_free_cset_time_ms(young_time_ms);
  policy->phase_times()->record_non_young_free_cset_time_ms(non_young_time_ms);
 }
@ -6370,10 +6322,10 @@ bool G1CollectedHeap::check_young_list_empty(bool check_heap, bool check_sample)

 class TearDownRegionSetsClosure : public HeapRegionClosure {
 private:
-  OldRegionSet *_old_set;
+  HeapRegionSet *_old_set;

 public:
-  TearDownRegionSetsClosure(OldRegionSet* old_set) : _old_set(old_set) { }
+  TearDownRegionSetsClosure(HeapRegionSet* old_set) : _old_set(old_set) { }

  bool doHeapRegion(HeapRegion* r) {
    if (r->is_empty()) {
@ -6402,9 +6354,10 @@ void G1CollectedHeap::tear_down_region_sets(bool free_list_only) {
    TearDownRegionSetsClosure cl(&_old_set);
    heap_region_iterate(&cl);

-    // Need to do this after the heap iteration to be able to
-    // recognize the young regions and ignore them during the iteration.
-    _young_list->empty_list();
+    // Note that emptying the _young_list is postponed and instead done as
+    // the first step when rebuilding the regions sets again. The reason for
+    // this is that during a full GC string deduplication needs to know if
+    // a collected region was young or old when the full GC was initiated.
  }
  _free_list.remove_all();
 }
@ -6412,13 +6365,13 @@ void G1CollectedHeap::tear_down_region_sets(bool free_list_only) {
 class RebuildRegionSetsClosure : public HeapRegionClosure {
 private:
  bool            _free_list_only;
-  OldRegionSet*   _old_set;
+  HeapRegionSet*   _old_set;
  FreeRegionList* _free_list;
  size_t          _total_used;

 public:
  RebuildRegionSetsClosure(bool free_list_only,
-                           OldRegionSet* old_set, FreeRegionList* free_list) :
+                           HeapRegionSet* old_set, FreeRegionList* free_list) :
    _free_list_only(free_list_only),
    _old_set(old_set), _free_list(free_list), _total_used(0) {
    assert(_free_list->is_empty(), "pre-condition");
@ -6458,6 +6411,10 @@ public:
 void G1CollectedHeap::rebuild_region_sets(bool free_list_only) {
  assert_at_safepoint(true /* should_be_vm_thread */);

+  if (!free_list_only) {
+    _young_list->empty_list();
+  }
+
  RebuildRegionSetsClosure cl(free_list_only, &_old_set, &_free_list);
  heap_region_iterate(&cl);

@ -6493,6 +6450,7 @@ HeapRegion* G1CollectedHeap::new_mutator_alloc_region(size_t word_size,
  bool young_list_full = g1_policy()->is_young_list_full();
  if (force || !young_list_full) {
    HeapRegion* new_alloc_region = new_region(word_size,
+                                              false /* is_old */,
                                              false /* do_expand */);
    if (new_alloc_region != NULL) {
      set_region_short_lived_locked(new_alloc_region);
@ -6551,14 +6509,16 @@ HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size,
  assert(FreeList_lock->owned_by_self(), "pre-condition");

  if (count < g1_policy()->max_regions(ap)) {
+    bool survivor = (ap == GCAllocForSurvived);
    HeapRegion* new_alloc_region = new_region(word_size,
+                                              !survivor,
                                              true /* do_expand */);
    if (new_alloc_region != NULL) {
      // We really only need to do this for old regions given that we
      // should never scan survivors. But it doesn't hurt to do it
      // for survivors too.
      new_alloc_region->set_saved_mark();
-      if (ap == GCAllocForSurvived) {
+      if (survivor) {
        new_alloc_region->set_survivor();
        _hr_printer.alloc(new_alloc_region, G1HRPrinter::Survivor);
      } else {
@ -6615,23 +6575,22 @@ void OldGCAllocRegion::retire_region(HeapRegion* alloc_region,

 class VerifyRegionListsClosure : public HeapRegionClosure {
 private:
-  FreeRegionList*     _free_list;
-  OldRegionSet*       _old_set;
-  HumongousRegionSet* _humongous_set;
-  uint                _region_count;
+  HeapRegionSet*   _old_set;
+  HeapRegionSet*   _humongous_set;
+  FreeRegionList*  _free_list;

 public:
-  VerifyRegionListsClosure(OldRegionSet* old_set,
-                           HumongousRegionSet* humongous_set,
-                           FreeRegionList* free_list) :
-    _old_set(old_set), _humongous_set(humongous_set),
-    _free_list(free_list), _region_count(0) { }
+  HeapRegionSetCount _old_count;
+  HeapRegionSetCount _humongous_count;
+  HeapRegionSetCount _free_count;

-  uint region_count() { return _region_count; }
+  VerifyRegionListsClosure(HeapRegionSet* old_set,
+                           HeapRegionSet* humongous_set,
+                           FreeRegionList* free_list) :
+    _old_set(old_set), _humongous_set(humongous_set), _free_list(free_list),
+    _old_count(), _humongous_count(), _free_count(){ }

  bool doHeapRegion(HeapRegion* hr) {
-    _region_count += 1;
-
    if (hr->continuesHumongous()) {
      return false;
    }
@ -6639,14 +6598,31 @@ public:
    if (hr->is_young()) {
      // TODO
    } else if (hr->startsHumongous()) {
-      _humongous_set->verify_next_region(hr);
+      assert(hr->containing_set() == _humongous_set, err_msg("Heap region %u is starts humongous but not in humongous set.", hr->region_num()));
+      _humongous_count.increment(1u, hr->capacity());
    } else if (hr->is_empty()) {
-      _free_list->verify_next_region(hr);
+      assert(hr->containing_set() == _free_list, err_msg("Heap region %u is empty but not on the free list.", hr->region_num()));
+      _free_count.increment(1u, hr->capacity());
    } else {
-      _old_set->verify_next_region(hr);
+      assert(hr->containing_set() == _old_set, err_msg("Heap region %u is old but not in the old set.", hr->region_num()));
+      _old_count.increment(1u, hr->capacity());
    }
    return false;
  }
+
+  void verify_counts(HeapRegionSet* old_set, HeapRegionSet* humongous_set, FreeRegionList* free_list) {
+    guarantee(old_set->length() == _old_count.length(), err_msg("Old set count mismatch. Expected %u, actual %u.", old_set->length(), _old_count.length()));
+    guarantee(old_set->total_capacity_bytes() == _old_count.capacity(), err_msg("Old set capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT,
+        old_set->total_capacity_bytes(), _old_count.capacity()));
+
+    guarantee(humongous_set->length() == _humongous_count.length(), err_msg("Hum set count mismatch. Expected %u, actual %u.", humongous_set->length(), _humongous_count.length()));
+    guarantee(humongous_set->total_capacity_bytes() == _humongous_count.capacity(), err_msg("Hum set capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT,
+        humongous_set->total_capacity_bytes(), _humongous_count.capacity()));
+
+    guarantee(free_list->length() == _free_count.length(), err_msg("Free list count mismatch. Expected %u, actual %u.", free_list->length(), _free_count.length()));
+    guarantee(free_list->total_capacity_bytes() == _free_count.capacity(), err_msg("Free list capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT,
+        free_list->total_capacity_bytes(), _free_count.capacity()));
+  }
 };

 HeapRegion* G1CollectedHeap::new_heap_region(uint hrs_index,
@ -6662,16 +6638,14 @@ void G1CollectedHeap::verify_region_sets() {
  assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);

  // First, check the explicit lists.
-  _free_list.verify();
+  _free_list.verify_list();
  {
    // Given that a concurrent operation might be adding regions to
    // the secondary free list we have to take the lock before
    // verifying it.
    MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
-    _secondary_free_list.verify();
+    _secondary_free_list.verify_list();
  }
-  _old_set.verify();
-  _humongous_set.verify();

  // If a concurrent region freeing operation is in progress it will
  // be difficult to correctly attributed any free regions we come
@ -6694,16 +6668,10 @@ void G1CollectedHeap::verify_region_sets() {

  // Finally, make sure that the region accounting in the lists is
  // consistent with what we see in the heap.
-  _old_set.verify_start();
-  _humongous_set.verify_start();
-  _free_list.verify_start();

  VerifyRegionListsClosure cl(&_old_set, &_humongous_set, &_free_list);
  heap_region_iterate(&cl);
-
-  _old_set.verify_end();
-  _humongous_set.verify_end();
-  _free_list.verify_end();
+  cl.verify_counts(&_old_set, &_humongous_set, &_free_list);
 }

 // Optimized nmethod scanning
@ -6804,6 +6772,13 @@ void G1CollectedHeap::migrate_strong_code_roots() {
  g1_policy()->phase_times()->record_strong_code_root_migration_time(migration_time_ms);
 }

+void G1CollectedHeap::purge_code_root_memory() {
+  double purge_start = os::elapsedTime();
+  G1CodeRootSet::purge_chunks(G1CodeRootsChunkCacheKeepPercent);
+  double purge_time_ms = (os::elapsedTime() - purge_start) * 1000.0;
+  g1_policy()->phase_times()->record_strong_code_root_purge_time(purge_time_ms);
+}
+
 // Mark all the code roots that point into regions *not* in the
 // collection set.
 //
@ -6874,7 +6849,7 @@ public:
      // Code roots should never be attached to a continuation of a humongous region
      assert(hrrs->strong_code_roots_list_length() == 0,
             err_msg("code roots should never be attached to continuations of humongous region "HR_FORMAT
-                     " starting at "HR_FORMAT", but has "INT32_FORMAT,
+                     " starting at "HR_FORMAT", but has "SIZE_FORMAT,
                     HR_FORMAT_PARAMS(hr), HR_FORMAT_PARAMS(hr->humongous_start_region()),
                     hrrs->strong_code_roots_list_length()));
      return false;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,7 @@
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
 #include "gc_implementation/g1/heapRegionSeq.hpp"
-#include "gc_implementation/g1/heapRegionSets.hpp"
+#include "gc_implementation/g1/heapRegionSet.hpp"
 #include "gc_implementation/shared/hSpaceCounters.hpp"
 #include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "memory/barrierSet.hpp"
@ -243,18 +243,18 @@ private:
  MemRegion _g1_committed;

  // The master free list. It will satisfy all new region allocations.
-  MasterFreeRegionList      _free_list;
+  FreeRegionList _free_list;

  // The secondary free list which contains regions that have been
  // freed up during the cleanup process. This will be appended to the
  // master free list when appropriate.
-  SecondaryFreeRegionList   _secondary_free_list;
+  FreeRegionList _secondary_free_list;

  // It keeps track of the old regions.
-  MasterOldRegionSet        _old_set;
+  HeapRegionSet _old_set;

  // It keeps track of the humongous regions.
-  MasterHumongousRegionSet  _humongous_set;
+  HeapRegionSet _humongous_set;

  // The number of regions we could create by expansion.
  uint _expansion_regions;
@ -497,13 +497,14 @@ protected:
  // check whether there's anything available on the
  // secondary_free_list and/or wait for more regions to appear on
  // that list, if _free_regions_coming is set.
-  HeapRegion* new_region_try_secondary_free_list();
+  HeapRegion* new_region_try_secondary_free_list(bool is_old);

  // Try to allocate a single non-humongous HeapRegion sufficient for
  // an allocation of the given word_size. If do_expand is true,
  // attempt to expand the heap if necessary to satisfy the allocation
-  // request.
-  HeapRegion* new_region(size_t word_size, bool do_expand);
+  // request. If the region is to be used as an old region or for a
+  // humongous object, set is_old to true. If not, to false.
+  HeapRegion* new_region(size_t word_size, bool is_old, bool do_expand);

  // Attempt to satisfy a humongous allocation request of the given
  // size by finding a contiguous set of free regions of num_regions
@ -757,6 +758,29 @@ public:

  G1HRPrinter* hr_printer() { return &_hr_printer; }

+  // Frees a non-humongous region by initializing its contents and
+  // adding it to the free list that's passed as a parameter (this is
+  // usually a local list which will be appended to the master free
+  // list later). The used bytes of freed regions are accumulated in
+  // pre_used. If par is true, the region's RSet will not be freed
+  // up. The assumption is that this will be done later.
+  // The locked parameter indicates if the caller has already taken
+  // care of proper synchronization. This may allow some optimizations.
+  void free_region(HeapRegion* hr,
+                   FreeRegionList* free_list,
+                   bool par,
+                   bool locked = false);
+
+  // Frees a humongous region by collapsing it into individual regions
+  // and calling free_region() for each of them. The freed regions
+  // will be added to the free list that's passed as a parameter (this
+  // is usually a local list which will be appended to the master free
+  // list later). The used bytes of freed regions are accumulated in
+  // pre_used. If par is true, the region's RSet will not be freed
+  // up. The assumption is that this will be done later.
+  void free_humongous_region(HeapRegion* hr,
+                             FreeRegionList* free_list,
+                             bool par);
 protected:

  // Shrink the garbage-first heap by at most the given size (in bytes!).
@ -835,30 +859,6 @@ protected:
                               G1KlassScanClosure* scan_klasses,
                               int worker_i);

-  // Frees a non-humongous region by initializing its contents and
-  // adding it to the free list that's passed as a parameter (this is
-  // usually a local list which will be appended to the master free
-  // list later). The used bytes of freed regions are accumulated in
-  // pre_used. If par is true, the region's RSet will not be freed
-  // up. The assumption is that this will be done later.
-  void free_region(HeapRegion* hr,
-                   size_t* pre_used,
-                   FreeRegionList* free_list,
-                   bool par);
-
-  // Frees a humongous region by collapsing it into individual regions
-  // and calling free_region() for each of them. The freed regions
-  // will be added to the free list that's passed as a parameter (this
-  // is usually a local list which will be appended to the master free
-  // list later). The used bytes of freed regions are accumulated in
-  // pre_used. If par is true, the region's RSet will not be freed
-  // up. The assumption is that this will be done later.
-  void free_humongous_region(HeapRegion* hr,
-                             size_t* pre_used,
-                             FreeRegionList* free_list,
-                             HumongousRegionSet* humongous_proxy_set,
-                             bool par);
-
  // Notifies all the necessary spaces that the committed space has
  // been updated (either expanded or shrunk). It should be called
  // after _g1_storage is updated.
@ -1228,21 +1228,17 @@ public:
  bool is_on_master_free_list(HeapRegion* hr) {
    return hr->containing_set() == &_free_list;
  }
-
-  bool is_in_humongous_set(HeapRegion* hr) {
-    return hr->containing_set() == &_humongous_set;
-  }
 #endif // ASSERT

  // Wrapper for the region list operations that can be called from
  // methods outside this class.

-  void secondary_free_list_add_as_tail(FreeRegionList* list) {
-    _secondary_free_list.add_as_tail(list);
+  void secondary_free_list_add(FreeRegionList* list) {
+    _secondary_free_list.add_ordered(list);
  }

  void append_secondary_free_list() {
-    _free_list.add_as_head(&_secondary_free_list);
+    _free_list.add_ordered(&_secondary_free_list);
  }

  void append_secondary_free_list_if_not_empty_with_lock() {
@ -1284,27 +1280,9 @@ public:
  // True iff an evacuation has failed in the most-recent collection.
  bool evacuation_failed() { return _evacuation_failed; }

-  // It will free a region if it has allocated objects in it that are
-  // all dead. It calls either free_region() or
-  // free_humongous_region() depending on the type of the region that
-  // is passed to it.
-  void free_region_if_empty(HeapRegion* hr,
-                            size_t* pre_used,
-                            FreeRegionList* free_list,
-                            OldRegionSet* old_proxy_set,
-                            HumongousRegionSet* humongous_proxy_set,
-                            HRRSCleanupTask* hrrs_cleanup_task,
-                            bool par);
-
-  // It appends the free list to the master free list and updates the
-  // master humongous list according to the contents of the proxy
-  // list. It also adjusts the total used bytes according to pre_used
-  // (if par is true, it will do so by taking the ParGCRareEvent_lock).
-  void update_sets_after_freeing_regions(size_t pre_used,
-                                       FreeRegionList* free_list,
-                                       OldRegionSet* old_proxy_set,
-                                       HumongousRegionSet* humongous_proxy_set,
-                                       bool par);
+  void remove_from_old_sets(const HeapRegionSetCount& old_regions_removed, const HeapRegionSetCount& humongous_regions_removed);
+  void prepend_to_freelist(FreeRegionList* list);
+  void decrement_summary_bytes(size_t bytes);

  // Returns "TRUE" iff "p" points into the committed areas of the heap.
  virtual bool is_in(const void* p) const;
@ -1659,6 +1637,9 @@ public:
  // that were not successfully evacuated are not migrated.
  void migrate_strong_code_roots();

+  // Free up superfluous code root memory.
+  void purge_code_root_memory();
+
  // During an initial mark pause, mark all the code roots that
  // point into regions *not* in the collection set.
  void mark_strong_code_roots(uint worker_id);
@ -1671,6 +1652,8 @@ public:
  // in symbol table, possibly in parallel.
  void unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool unlink_strings = true, bool unlink_symbols = true);

+  // Redirty logged cards in the refinement queue.
+  void redirty_logged_cards();
  // Verification

  // The following is just to alert the verification code
@ -1797,8 +1780,6 @@ protected:
  size_t           _undo_waste;

  OopsInHeapRegionClosure*      _evac_failure_cl;
-  G1ParScanHeapEvacClosure*     _evac_cl;
-  G1ParScanPartialArrayClosure* _partial_scan_cl;

  int  _hash_seed;
  uint _queue_num;
@ -1926,14 +1907,6 @@ public:
    return _evac_failure_cl;
  }

-  void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) {
-    _evac_cl = evac_cl;
-  }
-
-  void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) {
-    _partial_scan_cl = partial_scan_cl;
-  }
-
  int* hash_seed() { return &_hash_seed; }
  uint queue_num() { return _queue_num; }

@ -1981,19 +1954,121 @@ public:
                                                 false /* retain */);
    }
  }
+private:
+  #define G1_PARTIAL_ARRAY_MASK 0x2
+
+  inline bool has_partial_array_mask(oop* ref) const {
+    return ((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) == G1_PARTIAL_ARRAY_MASK;
+  }
+
+  // We never encode partial array oops as narrowOop*, so return false immediately.
+  // This allows the compiler to create optimized code when popping references from
+  // the work queue.
+  inline bool has_partial_array_mask(narrowOop* ref) const {
+    assert(((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) != G1_PARTIAL_ARRAY_MASK, "Partial array oop reference encoded as narrowOop*");
+    return false;
+  }
+
+  // Only implement set_partial_array_mask() for regular oops, not for narrowOops.
+  // We always encode partial arrays as regular oop, to allow the
+  // specialization for has_partial_array_mask() for narrowOops above.
+  // This means that unintentional use of this method with narrowOops are caught
+  // by the compiler.
+  inline oop* set_partial_array_mask(oop obj) const {
+    assert(((uintptr_t)(void *)obj & G1_PARTIAL_ARRAY_MASK) == 0, "Information loss!");
+    return (oop*) ((uintptr_t)(void *)obj | G1_PARTIAL_ARRAY_MASK);
+  }
+
+  inline oop clear_partial_array_mask(oop* ref) const {
+    return cast_to_oop((intptr_t)ref & ~G1_PARTIAL_ARRAY_MASK);
+  }
+
+  void do_oop_partial_array(oop* p) {
+    assert(has_partial_array_mask(p), "invariant");
+    oop from_obj = clear_partial_array_mask(p);
+
+    assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
+    assert(from_obj->is_objArray(), "must be obj array");
+    objArrayOop from_obj_array = objArrayOop(from_obj);
+    // The from-space object contains the real length.
+    int length                 = from_obj_array->length();
+
+    assert(from_obj->is_forwarded(), "must be forwarded");
+    oop to_obj                 = from_obj->forwardee();
+    assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
+    objArrayOop to_obj_array   = objArrayOop(to_obj);
+    // We keep track of the next start index in the length field of the
+    // to-space object.
+    int next_index             = to_obj_array->length();
+    assert(0 <= next_index && next_index < length,
+           err_msg("invariant, next index: %d, length: %d", next_index, length));
+
+    int start                  = next_index;
+    int end                    = length;
+    int remainder              = end - start;
+    // We'll try not to push a range that's smaller than ParGCArrayScanChunk.
+    if (remainder > 2 * ParGCArrayScanChunk) {
+      end = start + ParGCArrayScanChunk;
+      to_obj_array->set_length(end);
+      // Push the remainder before we process the range in case another
+      // worker has run out of things to do and can steal it.
+      oop* from_obj_p = set_partial_array_mask(from_obj);
+      push_on_queue(from_obj_p);
+    } else {
+      assert(length == end, "sanity");
+      // We'll process the final range for this object. Restore the length
+      // so that the heap remains parsable in case of evacuation failure.
+      to_obj_array->set_length(end);
+    }
+    _scanner.set_region(_g1h->heap_region_containing_raw(to_obj));
+    // Process indexes [start,end). It will also process the header
+    // along with the first chunk (i.e., the chunk with start == 0).
+    // Note that at this point the length field of to_obj_array is not
+    // correct given that we are using it to keep track of the next
+    // start index. oop_iterate_range() (thankfully!) ignores the length
+    // field and only relies on the start / end parameters.  It does
+    // however return the size of the object which will be incorrect. So
+    // we have to ignore it even if we wanted to use it.
+    to_obj_array->oop_iterate_range(&_scanner, start, end);
+  }
+
+  // This method is applied to the fields of the objects that have just been copied.
+  template <class T> void do_oop_evac(T* p, HeapRegion* from) {
+    assert(!oopDesc::is_null(oopDesc::load_decode_heap_oop(p)),
+           "Reference should not be NULL here as such are never pushed to the task queue.");
+    oop obj = oopDesc::load_decode_heap_oop_not_null(p);
+
+    // Although we never intentionally push references outside of the collection
+    // set, due to (benign) races in the claim mechanism during RSet scanning more
+    // than one thread might claim the same card. So the same card may be
+    // processed multiple times. So redo this check.
+    if (_g1h->in_cset_fast_test(obj)) {
+      oop forwardee;
+      if (obj->is_forwarded()) {
+        forwardee = obj->forwardee();
+      } else {
+        forwardee = copy_to_survivor_space(obj);
+      }
+      assert(forwardee != NULL, "forwardee should not be NULL");
+      oopDesc::encode_store_heap_oop(p, forwardee);
+    }
+
+    assert(obj != NULL, "Must be");
+    update_rs(from, p, queue_num());
+  }
+public:

  oop copy_to_survivor_space(oop const obj);

  template <class T> void deal_with_reference(T* ref_to_scan) {
-    if (has_partial_array_mask(ref_to_scan)) {
-      _partial_scan_cl->do_oop_nv(ref_to_scan);
-    } else {
+    if (!has_partial_array_mask(ref_to_scan)) {
      // Note: we can use "raw" versions of "region_containing" because
      // "obj_to_scan" is definitely in the heap, and is not in a
      // humongous region.
      HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
-      _evac_cl->set_region(r);
-      _evac_cl->do_oop_nv(ref_to_scan);
+      do_oop_evac(ref_to_scan, r);
+    } else {
+      do_oop_partial_array((oop*)ref_to_scan);
    }
  }

--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
@ -30,6 +30,7 @@
 #include "gc_implementation/g1/g1AllocRegion.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegionSet.inline.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "utilities/taskqueue.hpp"

--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -27,6 +27,7 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"

 // Helper class for avoiding interleaved logging
 class LineBuffer: public StackObj {
@ -168,7 +169,9 @@ G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
  _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
  _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false),
  _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"),
-  _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf")
+  _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"),
+  _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"),
+  _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf")
 {
  assert(max_gc_threads > 0, "Must have some GC threads");
 }
@ -229,6 +232,16 @@ void G1GCPhaseTimes::note_gc_end() {
  _last_gc_worker_other_times_ms.verify();
 }

+void G1GCPhaseTimes::note_string_dedup_fixup_start() {
+  _cur_string_dedup_queue_fixup_worker_times_ms.reset();
+  _cur_string_dedup_table_fixup_worker_times_ms.reset();
+}
+
+void G1GCPhaseTimes::note_string_dedup_fixup_end() {
+  _cur_string_dedup_queue_fixup_worker_times_ms.verify();
+  _cur_string_dedup_table_fixup_worker_times_ms.verify();
+}
+
 void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
 }
@ -250,6 +263,14 @@ double G1GCPhaseTimes::accounted_time_ms() {
    // Strong code root migration time
    misc_time_ms += _cur_strong_code_root_migration_time_ms;

+    // Strong code root purge time
+    misc_time_ms += _cur_strong_code_root_purge_time_ms;
+
+    if (G1StringDedup::is_enabled()) {
+      // String dedup fixup time
+      misc_time_ms += _cur_string_dedup_fixup_time_ms;
+    }
+
    // Subtract the time taken to clean the card table from the
    // current value of "other time"
    misc_time_ms += _cur_clear_ct_time_ms;
@ -299,20 +320,43 @@ void G1GCPhaseTimes::print(double pause_time_sec) {
  }
  print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
  print_stats(1, "Code Root Migration", _cur_strong_code_root_migration_time_ms);
+  print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms);
+  if (G1StringDedup::is_enabled()) {
+    print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads);
+    _cur_string_dedup_queue_fixup_worker_times_ms.print(2, "Queue Fixup (ms)");
+    _cur_string_dedup_table_fixup_worker_times_ms.print(2, "Table Fixup (ms)");
+  }
  print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
  double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
  print_stats(1, "Other", misc_time_ms);
  if (_cur_verify_before_time_ms > 0.0) {
    print_stats(2, "Verify Before", _cur_verify_before_time_ms);
  }
+  if (G1CollectedHeap::heap()->evacuation_failed()) {
+    double evac_fail_handling = _cur_evac_fail_recalc_used + _cur_evac_fail_remove_self_forwards +
+      _cur_evac_fail_restore_remsets;
+    print_stats(2, "Evacuation Failure", evac_fail_handling);
+    if (G1Log::finest()) {
+      print_stats(3, "Recalculate Used", _cur_evac_fail_recalc_used);
+      print_stats(3, "Remove Self Forwards", _cur_evac_fail_remove_self_forwards);
+      print_stats(3, "Restore RemSet", _cur_evac_fail_restore_remsets);
+    }
+  }
  print_stats(2, "Choose CSet",
    (_recorded_young_cset_choice_time_ms +
    _recorded_non_young_cset_choice_time_ms));
  print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
  print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
+  if (G1DeferredRSUpdate) {
+    print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms);
+  }
  print_stats(2, "Free CSet",
    (_recorded_young_free_cset_time_ms +
    _recorded_non_young_free_cset_time_ms));
+  if (G1Log::finest()) {
+    print_stats(3, "Young Free CSet", _recorded_young_free_cset_time_ms);
+    print_stats(3, "Non-Young Free CSet", _recorded_non_young_free_cset_time_ms);
+  }
  if (_cur_verify_after_time_ms > 0.0) {
    print_stats(2, "Verify After", _cur_verify_after_time_ms);
  }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -131,6 +131,15 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
  double _cur_collection_par_time_ms;
  double _cur_collection_code_root_fixup_time_ms;
  double _cur_strong_code_root_migration_time_ms;
+  double _cur_strong_code_root_purge_time_ms;
+
+  double _cur_evac_fail_recalc_used;
+  double _cur_evac_fail_restore_remsets;
+  double _cur_evac_fail_remove_self_forwards;
+
+  double                  _cur_string_dedup_fixup_time_ms;
+  WorkerDataArray<double> _cur_string_dedup_queue_fixup_worker_times_ms;
+  WorkerDataArray<double> _cur_string_dedup_table_fixup_worker_times_ms;

  double _cur_clear_ct_time_ms;
  double _cur_ref_proc_time_ms;
@ -142,6 +151,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
  double _recorded_young_cset_choice_time_ms;
  double _recorded_non_young_cset_choice_time_ms;

+  double _recorded_redirty_logged_cards_time_ms;
+
  double _recorded_young_free_cset_time_ms;
  double _recorded_non_young_free_cset_time_ms;

@ -223,6 +234,37 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    _cur_strong_code_root_migration_time_ms = ms;
  }

+  void record_strong_code_root_purge_time(double ms) {
+    _cur_strong_code_root_purge_time_ms = ms;
+  }
+
+  void record_evac_fail_recalc_used_time(double ms) {
+    _cur_evac_fail_recalc_used = ms;
+  }
+
+  void record_evac_fail_restore_remsets(double ms) {
+    _cur_evac_fail_restore_remsets = ms;
+  }
+
+  void record_evac_fail_remove_self_forwards(double ms) {
+    _cur_evac_fail_remove_self_forwards = ms;
+  }
+
+  void note_string_dedup_fixup_start();
+  void note_string_dedup_fixup_end();
+
+  void record_string_dedup_fixup_time(double ms) {
+    _cur_string_dedup_fixup_time_ms = ms;
+  }
+
+  void record_string_dedup_queue_fixup_worker_time(uint worker_id, double ms) {
+    _cur_string_dedup_queue_fixup_worker_times_ms.set(worker_id, ms);
+  }
+
+  void record_string_dedup_table_fixup_worker_time(uint worker_id, double ms) {
+    _cur_string_dedup_table_fixup_worker_times_ms.set(worker_id, ms);
+  }
+
  void record_ref_proc_time(double ms) {
    _cur_ref_proc_time_ms = ms;
  }
@ -251,6 +293,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    _recorded_non_young_cset_choice_time_ms = time_ms;
  }

+  void record_redirty_logged_cards_time_ms(double time_ms) {
+    _recorded_redirty_logged_cards_time_ms = time_ms;
+  }
+
  void record_cur_collection_start_sec(double time_ms) {
    _cur_collection_start_sec = time_ms;
  }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -31,6 +31,7 @@
 #include "code/icBuffer.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/shared/gcHeapSummary.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
@ -194,17 +195,19 @@ class G1PrepareCompactClosure: public HeapRegionClosure {
  G1CollectedHeap* _g1h;
  ModRefBarrierSet* _mrbs;
  CompactPoint _cp;
-  HumongousRegionSet _humongous_proxy_set;
+  HeapRegionSetCount _humongous_regions_removed;

  void free_humongous_region(HeapRegion* hr) {
    HeapWord* end = hr->end();
-    size_t dummy_pre_used;
    FreeRegionList dummy_free_list("Dummy Free List for G1MarkSweep");

    assert(hr->startsHumongous(),
           "Only the start of a humongous region should be freed.");
-    _g1h->free_humongous_region(hr, &dummy_pre_used, &dummy_free_list,
-                                &_humongous_proxy_set, false /* par */);
+
+    hr->set_containing_set(NULL);
+    _humongous_regions_removed.increment(1u, hr->capacity());
+
+    _g1h->free_humongous_region(hr, &dummy_free_list, false /* par */);
    hr->prepare_for_compaction(&_cp);
    // Also clear the part of the card table that will be unused after
    // compaction.
@ -217,16 +220,13 @@ public:
  : _g1h(G1CollectedHeap::heap()),
    _mrbs(_g1h->g1_barrier_set()),
    _cp(NULL, cs, cs->initialize_threshold()),
-    _humongous_proxy_set("G1MarkSweep Humongous Proxy Set") { }
+    _humongous_regions_removed() { }

  void update_sets() {
    // We'll recalculate total used bytes and recreate the free list
    // at the end of the GC, so no point in updating those values here.
-    _g1h->update_sets_after_freeing_regions(0, /* pre_used */
-                                            NULL, /* free_list */
-                                            NULL, /* old_proxy_set */
-                                            &_humongous_proxy_set,
-                                            false /* par */);
+    HeapRegionSetCount empty_set;
+    _g1h->remove_from_old_sets(empty_set, _humongous_regions_removed);
  }

  bool doHeapRegion(HeapRegion* hr) {
@ -317,6 +317,10 @@ void G1MarkSweep::mark_sweep_phase3() {
  // have been cleared if they pointed to non-surviving objects.)
  sh->process_weak_roots(&GenMarkSweep::adjust_pointer_closure);

+  if (G1StringDedup::is_enabled()) {
+    G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure);
+  }
+
  GenMarkSweep::adjust_marks();

  G1AdjustPointersClosure blk;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
@ -80,53 +80,6 @@ public:
  virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
 };

-#define G1_PARTIAL_ARRAY_MASK 0x2
-
-inline bool has_partial_array_mask(oop* ref) {
-  return ((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) == G1_PARTIAL_ARRAY_MASK;
-}
-
-// We never encode partial array oops as narrowOop*, so return false immediately.
-// This allows the compiler to create optimized code when popping references from
-// the work queue.
-inline bool has_partial_array_mask(narrowOop* ref) {
-  assert(((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) != G1_PARTIAL_ARRAY_MASK, "Partial array oop reference encoded as narrowOop*");
-  return false;
-}
-
-// Only implement set_partial_array_mask() for regular oops, not for narrowOops.
-// We always encode partial arrays as regular oop, to allow the
-// specialization for has_partial_array_mask() for narrowOops above.
-// This means that unintentional use of this method with narrowOops are caught
-// by the compiler.
-inline oop* set_partial_array_mask(oop obj) {
-  assert(((uintptr_t)(void *)obj & G1_PARTIAL_ARRAY_MASK) == 0, "Information loss!");
-  return (oop*) ((uintptr_t)(void *)obj | G1_PARTIAL_ARRAY_MASK);
-}
-
-template <class T> inline oop clear_partial_array_mask(T* ref) {
-  return cast_to_oop((intptr_t)ref & ~G1_PARTIAL_ARRAY_MASK);
-}
-
-class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
-  G1ParScanClosure _scanner;
-
-public:
-  G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state, ReferenceProcessor* rp) :
-    G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state, rp)
-  {
-    assert(_ref_processor == NULL, "sanity");
-  }
-
-  G1ParScanClosure* scanner() {
-    return &_scanner;
-  }
-
-  template <class T> void do_oop_nv(T* p);
-  virtual void do_oop(oop* p)       { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
-};
-
 // Add back base class for metadata
 class G1ParCopyHelper : public G1ParClosureSuper {
 protected:
@ -173,15 +126,8 @@ typedef G1ParCopyClosure<G1BarrierKlass, false> G1ParScanMetadataClosure;
 typedef G1ParCopyClosure<G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
 typedef G1ParCopyClosure<G1BarrierKlass, true> G1ParScanAndMarkMetadataClosure;

-// The following closure type is defined in g1_specialized_oop_closures.hpp:
-//
-// typedef G1ParCopyClosure<G1BarrierEvac, false> G1ParScanHeapEvacClosure;
-
 // We use a separate closure to handle references during evacuation
 // failure processing.
-// We could have used another instance of G1ParScanHeapEvacClosure
-// (since that closure no longer assumes that the references it
-// handles point into the collection set).

 typedef G1ParCopyClosure<G1BarrierEvac, false> G1ParScanHeapEvacFailureClosure;

--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@ -462,8 +462,9 @@ void G1RemSet::cleanup_after_oops_into_collection_set_do() {
  int into_cset_n_buffers = into_cset_dcqs.completed_buffers_num();

  if (_g1->evacuation_failed()) {
-    // Restore remembered sets for the regions pointing into the collection set.
+    double restore_remembered_set_start = os::elapsedTime();

+    // Restore remembered sets for the regions pointing into the collection set.
    if (G1DeferredRSUpdate) {
      // If deferred RS updates are enabled then we just need to transfer
      // the completed buffers from (a) the DirtyCardQueueSet used to hold
@ -482,6 +483,8 @@ void G1RemSet::cleanup_after_oops_into_collection_set_do() {
      }
      assert(n_completed_buffers == into_cset_n_buffers, "missed some buffers");
    }
+
+    _g1->g1_policy()->phase_times()->record_evac_fail_restore_remsets((os::elapsedTime() - restore_remembered_set_start) * 1000.0);
  }

  // Free any completed buffers in the DirtyCardQueueSet used to hold cards
--- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/javaClasses.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"
+#include "gc_implementation/g1/g1StringDedupQueue.hpp"
+#include "gc_implementation/g1/g1StringDedupStat.hpp"
+#include "gc_implementation/g1/g1StringDedupTable.hpp"
+#include "gc_implementation/g1/g1StringDedupThread.hpp"
+
+bool G1StringDedup::_enabled = false;
+
+void G1StringDedup::initialize() {
+  assert(UseG1GC, "String deduplication only available with G1");
+  if (UseStringDeduplication) {
+    _enabled = true;
+    G1StringDedupQueue::create();
+    G1StringDedupTable::create();
+    G1StringDedupThread::create();
+  }
+}
+
+bool G1StringDedup::is_candidate_from_mark(oop obj) {
+  if (java_lang_String::is_instance(obj)) {
+    bool from_young = G1CollectedHeap::heap()->heap_region_containing_raw(obj)->is_young();
+    if (from_young && obj->age() < StringDeduplicationAgeThreshold) {
+      // Candidate found. String is being evacuated from young to old but has not
+      // reached the deduplication age threshold, i.e. has not previously been a
+      // candidate during its life in the young generation.
+      return true;
+    }
+  }
+
+  // Not a candidate
+  return false;
+}
+
+void G1StringDedup::enqueue_from_mark(oop java_string) {
+  assert(is_enabled(), "String deduplication not enabled");
+  if (is_candidate_from_mark(java_string)) {
+    G1StringDedupQueue::push(0 /* worker_id */, java_string);
+  }
+}
+
+bool G1StringDedup::is_candidate_from_evacuation(bool from_young, bool to_young, oop obj) {
+  if (from_young && java_lang_String::is_instance(obj)) {
+    if (to_young && obj->age() == StringDeduplicationAgeThreshold) {
+      // Candidate found. String is being evacuated from young to young and just
+      // reached the deduplication age threshold.
+      return true;
+    }
+    if (!to_young && obj->age() < StringDeduplicationAgeThreshold) {
+      // Candidate found. String is being evacuated from young to old but has not
+      // reached the deduplication age threshold, i.e. has not previously been a
+      // candidate during its life in the young generation.
+      return true;
+    }
+  }
+
+  // Not a candidate
+  return false;
+}
+
+void G1StringDedup::enqueue_from_evacuation(bool from_young, bool to_young, uint worker_id, oop java_string) {
+  assert(is_enabled(), "String deduplication not enabled");
+  if (is_candidate_from_evacuation(from_young, to_young, java_string)) {
+    G1StringDedupQueue::push(worker_id, java_string);
+  }
+}
+
+void G1StringDedup::deduplicate(oop java_string) {
+  assert(is_enabled(), "String deduplication not enabled");
+  G1StringDedupStat dummy; // Statistics from this path is never used
+  G1StringDedupTable::deduplicate(java_string, dummy);
+}
+
+void G1StringDedup::oops_do(OopClosure* keep_alive) {
+  assert(is_enabled(), "String deduplication not enabled");
+  unlink_or_oops_do(NULL, keep_alive);
+}
+
+void G1StringDedup::unlink(BoolObjectClosure* is_alive) {
+  assert(is_enabled(), "String deduplication not enabled");
+  // Don't allow a potential resize or rehash during unlink, as the unlink
+  // operation itself might remove enough entries to invalidate such a decision.
+  unlink_or_oops_do(is_alive, NULL, false /* allow_resize_and_rehash */);
+}
+
+//
+// Task for parallel unlink_or_oops_do() operation on the deduplication queue
+// and table.
+//
+class G1StringDedupUnlinkOrOopsDoTask : public AbstractGangTask {
+private:
+  G1StringDedupUnlinkOrOopsDoClosure _cl;
+
+public:
+  G1StringDedupUnlinkOrOopsDoTask(BoolObjectClosure* is_alive,
+                                  OopClosure* keep_alive,
+                                  bool allow_resize_and_rehash) :
+    AbstractGangTask("G1StringDedupUnlinkOrOopsDoTask"),
+    _cl(is_alive, keep_alive, allow_resize_and_rehash) {
+  }
+
+  virtual void work(uint worker_id) {
+    double queue_fixup_start = os::elapsedTime();
+    G1StringDedupQueue::unlink_or_oops_do(&_cl);
+
+    double table_fixup_start = os::elapsedTime();
+    G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
+
+    double queue_fixup_time_ms = (table_fixup_start - queue_fixup_start) * 1000.0;
+    double table_fixup_time_ms = (os::elapsedTime() - table_fixup_start) * 1000.0;
+    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+    g1p->phase_times()->record_string_dedup_queue_fixup_worker_time(worker_id, queue_fixup_time_ms);
+    g1p->phase_times()->record_string_dedup_table_fixup_worker_time(worker_id, table_fixup_time_ms);
+  }
+};
+
+void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, bool allow_resize_and_rehash) {
+  assert(is_enabled(), "String deduplication not enabled");
+  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+  g1p->phase_times()->note_string_dedup_fixup_start();
+  double fixup_start = os::elapsedTime();
+
+  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash);
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+    g1h->set_par_threads();
+    g1h->workers()->run_task(&task);
+    g1h->set_par_threads(0);
+  } else {
+    task.work(0);
+  }
+
+  double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
+  g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms);
+  g1p->phase_times()->note_string_dedup_fixup_end();
+}
+
+void G1StringDedup::threads_do(ThreadClosure* tc) {
+  assert(is_enabled(), "String deduplication not enabled");
+  tc->do_thread(G1StringDedupThread::thread());
+}
+
+void G1StringDedup::print_worker_threads_on(outputStream* st) {
+  assert(is_enabled(), "String deduplication not enabled");
+  G1StringDedupThread::thread()->print_on(st);
+  st->cr();
+}
+
+void G1StringDedup::verify() {
+  assert(is_enabled(), "String deduplication not enabled");
+  G1StringDedupQueue::verify();
+  G1StringDedupTable::verify();
+}
+
+G1StringDedupUnlinkOrOopsDoClosure::G1StringDedupUnlinkOrOopsDoClosure(BoolObjectClosure* is_alive,
+                                                                       OopClosure* keep_alive,
+                                                                       bool allow_resize_and_rehash) :
+  _is_alive(is_alive),
+  _keep_alive(keep_alive),
+  _resized_table(NULL),
+  _rehashed_table(NULL),
+  _next_queue(0),
+  _next_bucket(0) {
+  if (allow_resize_and_rehash) {
+    // If both resize and rehash is needed, only do resize. Rehash of
+    // the table will eventually happen if the situation persists.
+    _resized_table = G1StringDedupTable::prepare_resize();
+    if (!is_resizing()) {
+      _rehashed_table = G1StringDedupTable::prepare_rehash();
+    }
+  }
+}
+
+G1StringDedupUnlinkOrOopsDoClosure::~G1StringDedupUnlinkOrOopsDoClosure() {
+  assert(!is_resizing() || !is_rehashing(), "Can not both resize and rehash");
+  if (is_resizing()) {
+    G1StringDedupTable::finish_resize(_resized_table);
+  } else if (is_rehashing()) {
+    G1StringDedupTable::finish_rehash(_rehashed_table);
+  }
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedup.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedup.hpp
@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUP_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUP_HPP
+
+//
+// String Deduplication
+//
+// String deduplication aims to reduce the heap live-set by deduplicating identical
+// instances of String so that they share the same backing character array.
+//
+// The deduplication process is divided in two main parts, 1) finding the objects to
+// deduplicate, and 2) deduplicating those objects. The first part is done as part of
+// a normal GC cycle when objects are marked or evacuated. At this time a check is
+// applied on each object to check if it is a candidate for deduplication. If so, the
+// object is placed on the deduplication queue for later processing. The second part,
+// processing the objects on the deduplication queue, is a concurrent phase which
+// starts right after the stop-the-wold marking/evacuation phase. This phase is
+// executed by the deduplication thread, which pulls deduplication candidates of the
+// deduplication queue and tries to deduplicate them.
+//
+// A deduplication hashtable is used to keep track of all unique character arrays
+// used by String objects. When deduplicating, a lookup is made in this table to see
+// if there is already an identical character array somewhere on the heap. If so, the
+// String object is adjusted to point to that character array, releasing the reference
+// to the original array allowing it to eventually be garbage collected. If the lookup
+// fails the character array is instead inserted into the hashtable so that this array
+// can be shared at some point in the future.
+//
+// Candidate selection
+//
+// An object is considered a deduplication candidate if all of the following
+// statements are true:
+//
+// - The object is an instance of java.lang.String
+//
+// - The object is being evacuated from a young heap region
+//
+// - The object is being evacuated to a young/survivor heap region and the
+//   object's age is equal to the deduplication age threshold
+//
+//   or
+//
+//   The object is being evacuated to an old heap region and the object's age is
+//   less than the deduplication age threshold
+//
+// Once an string object has been promoted to an old region, or its age is higher
+// than the deduplication age threshold, is will never become a candidate again.
+// This approach avoids making the same object a candidate more than once.
+//
+// Interned strings are a bit special. They are explicitly deduplicated just before
+// being inserted into the StringTable (to avoid counteracting C2 optimizations done
+// on string literals), then they also become deduplication candidates if they reach
+// the deduplication age threshold or are evacuated to an old heap region. The second
+// attempt to deduplicate such strings will be in vain, but we have no fast way of
+// filtering them out. This has not shown to be a problem, as the number of interned
+// strings is usually dwarfed by the number of normal (non-interned) strings.
+//
+// For additional information on string deduplication, please see JEP 192,
+// http://openjdk.java.net/jeps/192
+//
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+
+class OopClosure;
+class BoolObjectClosure;
+class ThreadClosure;
+class outputStream;
+class G1StringDedupTable;
+
+//
+// Main interface for interacting with string deduplication.
+//
+class G1StringDedup : public AllStatic {
+private:
+  // Single state for checking if both G1 and string deduplication is enabled.
+  static bool _enabled;
+
+  // Candidate selection policies, returns true if the given object is
+  // candidate for string deduplication.
+  static bool is_candidate_from_mark(oop obj);
+  static bool is_candidate_from_evacuation(bool from_young, bool to_young, oop obj);
+
+public:
+  // Returns true if both G1 and string deduplication is enabled.
+  static bool is_enabled() {
+    return _enabled;
+  }
+
+  static void initialize();
+
+  // Immediately deduplicates the given String object, bypassing the
+  // the deduplication queue.
+  static void deduplicate(oop java_string);
+
+  // Enqueues a deduplication candidate for later processing by the deduplication
+  // thread. Before enqueuing, these functions apply the appropriate candidate
+  // selection policy to filters out non-candidates.
+  static void enqueue_from_mark(oop java_string);
+  static void enqueue_from_evacuation(bool from_young, bool to_young,
+                                      unsigned int queue, oop java_string);
+
+  static void oops_do(OopClosure* keep_alive);
+  static void unlink(BoolObjectClosure* is_alive);
+  static void unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive,
+                                bool allow_resize_and_rehash = true);
+
+  static void threads_do(ThreadClosure* tc);
+  static void print_worker_threads_on(outputStream* st);
+  static void verify();
+};
+
+//
+// This closure encapsulates the state and the closures needed when scanning
+// the deduplication queue and table during the unlink_or_oops_do() operation.
+// A single instance of this closure is created and then shared by all worker
+// threads participating in the scan. The _next_queue and _next_bucket fields
+// provide a simple mechanism for GC workers to claim exclusive access to a
+// queue or a table partition.
+//
+class G1StringDedupUnlinkOrOopsDoClosure : public StackObj {
+private:
+  BoolObjectClosure*  _is_alive;
+  OopClosure*         _keep_alive;
+  G1StringDedupTable* _resized_table;
+  G1StringDedupTable* _rehashed_table;
+  size_t              _next_queue;
+  size_t              _next_bucket;
+
+public:
+  G1StringDedupUnlinkOrOopsDoClosure(BoolObjectClosure* is_alive,
+                                     OopClosure* keep_alive,
+                                     bool allow_resize_and_rehash);
+  ~G1StringDedupUnlinkOrOopsDoClosure();
+
+  bool is_resizing() {
+    return _resized_table != NULL;
+  }
+
+  G1StringDedupTable* resized_table() {
+    return _resized_table;
+  }
+
+  bool is_rehashing() {
+    return _rehashed_table != NULL;
+  }
+
+  // Atomically claims the next available queue for exclusive access by
+  // the current thread. Returns the queue number of the claimed queue.
+  size_t claim_queue() {
+    return (size_t)Atomic::add_ptr(1, &_next_queue) - 1;
+  }
+
+  // Atomically claims the next available table partition for exclusive
+  // access by the current thread. Returns the table bucket number where
+  // the claimed partition starts.
+  size_t claim_table_partition(size_t partition_size) {
+    return (size_t)Atomic::add_ptr(partition_size, &_next_bucket) - partition_size;
+  }
+
+  // Applies and returns the result from the is_alive closure, or
+  // returns true if no such closure was provided.
+  bool is_alive(oop o) {
+    if (_is_alive != NULL) {
+      return _is_alive->do_object_b(o);
+    }
+    return true;
+  }
+
+  // Applies the keep_alive closure, or does nothing if no such
+  // closure was provided.
+  void keep_alive(oop* p) {
+    if (_keep_alive != NULL) {
+      _keep_alive->do_oop(p);
+    }
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUP_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupQueue.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupQueue.cpp
@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/javaClasses.hpp"
+#include "gc_implementation/g1/g1StringDedupQueue.hpp"
+#include "memory/gcLocker.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "utilities/stack.inline.hpp"
+
+G1StringDedupQueue* G1StringDedupQueue::_queue = NULL;
+const size_t        G1StringDedupQueue::_max_size = 1000000; // Max number of elements per queue
+const size_t        G1StringDedupQueue::_max_cache_size = 0; // Max cache size per queue
+
+G1StringDedupQueue::G1StringDedupQueue() :
+  _cursor(0),
+  _empty(true),
+  _dropped(0) {
+  _nqueues = MAX2(ParallelGCThreads, (size_t)1);
+  _queues = NEW_C_HEAP_ARRAY(G1StringDedupWorkerQueue, _nqueues, mtGC);
+  for (size_t i = 0; i < _nqueues; i++) {
+    new (_queues + i) G1StringDedupWorkerQueue(G1StringDedupWorkerQueue::default_segment_size(), _max_cache_size, _max_size);
+  }
+}
+
+G1StringDedupQueue::~G1StringDedupQueue() {
+  ShouldNotReachHere();
+}
+
+void G1StringDedupQueue::create() {
+  assert(_queue == NULL, "One string deduplication queue allowed");
+  _queue = new G1StringDedupQueue();
+}
+
+void G1StringDedupQueue::wait() {
+  MonitorLockerEx ml(StringDedupQueue_lock, Mutex::_no_safepoint_check_flag);
+  while (_queue->_empty) {
+    ml.wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+void G1StringDedupQueue::push(uint worker_id, oop java_string) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint");
+  assert(worker_id < _queue->_nqueues, "Invalid queue");
+
+  // Push and notify waiter
+  G1StringDedupWorkerQueue& worker_queue = _queue->_queues[worker_id];
+  if (!worker_queue.is_full()) {
+    worker_queue.push(java_string);
+    if (_queue->_empty) {
+      MonitorLockerEx ml(StringDedupQueue_lock, Mutex::_no_safepoint_check_flag);
+      if (_queue->_empty) {
+        // Mark non-empty and notify waiter
+        _queue->_empty = false;
+        ml.notify();
+      }
+    }
+  } else {
+    // Queue is full, drop the string and update the statistics
+    Atomic::inc_ptr(&_queue->_dropped);
+  }
+}
+
+oop G1StringDedupQueue::pop() {
+  assert(!SafepointSynchronize::is_at_safepoint(), "Must not be at safepoint");
+  No_Safepoint_Verifier nsv;
+
+  // Try all queues before giving up
+  for (size_t tries = 0; tries < _queue->_nqueues; tries++) {
+    // The cursor indicates where we left of last time
+    G1StringDedupWorkerQueue* queue = &_queue->_queues[_queue->_cursor];
+    while (!queue->is_empty()) {
+      oop obj = queue->pop();
+      // The oop we pop can be NULL if it was marked
+      // dead. Just ignore those and pop the next oop.
+      if (obj != NULL) {
+        return obj;
+      }
+    }
+
+    // Try next queue
+    _queue->_cursor = (_queue->_cursor + 1) % _queue->_nqueues;
+  }
+
+  // Mark empty
+  _queue->_empty = true;
+
+  return NULL;
+}
+
+void G1StringDedupQueue::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl) {
+  // A worker thread first claims a queue, which ensures exclusive
+  // access to that queue, then continues to process it.
+  for (;;) {
+    // Grab next queue to scan
+    size_t queue = cl->claim_queue();
+    if (queue >= _queue->_nqueues) {
+      // End of queues
+      break;
+    }
+
+    // Scan the queue
+    unlink_or_oops_do(cl, queue);
+  }
+}
+
+void G1StringDedupQueue::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, size_t queue) {
+  assert(queue < _queue->_nqueues, "Invalid queue");
+  StackIterator<oop, mtGC> iter(_queue->_queues[queue]);
+  while (!iter.is_empty()) {
+    oop* p = iter.next_addr();
+    if (*p != NULL) {
+      if (cl->is_alive(*p)) {
+        cl->keep_alive(p);
+      } else {
+        // Clear dead reference
+        *p = NULL;
+      }
+    }
+  }
+}
+
+void G1StringDedupQueue::print_statistics(outputStream* st) {
+  st->print_cr(
+    "   [Queue]\n"
+    "      [Dropped: "UINTX_FORMAT"]", _queue->_dropped);
+}
+
+void G1StringDedupQueue::verify() {
+  for (size_t i = 0; i < _queue->_nqueues; i++) {
+    StackIterator<oop, mtGC> iter(_queue->_queues[i]);
+    while (!iter.is_empty()) {
+      oop obj = iter.next();
+      if (obj != NULL) {
+        guarantee(Universe::heap()->is_in_reserved(obj), "Object must be on the heap");
+        guarantee(!obj->is_forwarded(), "Object must not be forwarded");
+        guarantee(java_lang_String::is_instance(obj), "Object must be a String");
+      }
+    }
+  }
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupQueue.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupQueue.hpp
@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPQUEUE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPQUEUE_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+#include "utilities/stack.hpp"
+
+class G1StringDedupUnlinkOrOopsDoClosure;
+
+//
+// The deduplication queue acts as the communication channel between the stop-the-world
+// mark/evacuation phase and the concurrent deduplication phase. Deduplication candidates
+// found during mark/evacuation are placed on this queue for later processing in the
+// deduplication thread. A queue entry is an oop pointing to a String object (as opposed
+// to entries in the deduplication hashtable which points to character arrays).
+//
+// While users of the queue treat it as a single queue, it is implemented as a set of
+// queues, one queue per GC worker thread, to allow lock-free and cache-friendly enqueue
+// operations by the GC workers.
+//
+// The oops in the queue are treated as weak pointers, meaning the objects they point to
+// can become unreachable and pruned (cleared) before being popped by the deduplication
+// thread.
+//
+// Pushing to the queue is thread safe (this relies on each thread using a unique worker
+// id), but only allowed during a safepoint. Popping from the queue is NOT thread safe
+// and can only be done by the deduplication thread outside a safepoint.
+//
+// The StringDedupQueue_lock is only used for blocking and waking up the deduplication
+// thread in case the queue is empty or becomes non-empty, respectively. This lock does
+// not otherwise protect the queue content.
+//
+class G1StringDedupQueue : public CHeapObj<mtGC> {
+private:
+  typedef Stack<oop, mtGC> G1StringDedupWorkerQueue;
+
+  static G1StringDedupQueue* _queue;
+  static const size_t        _max_size;
+  static const size_t        _max_cache_size;
+
+  G1StringDedupWorkerQueue*  _queues;
+  size_t                     _nqueues;
+  size_t                     _cursor;
+  volatile bool              _empty;
+
+  // Statistics counter, only used for logging.
+  uintx                      _dropped;
+
+  G1StringDedupQueue();
+  ~G1StringDedupQueue();
+
+  static void unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, size_t queue);
+
+public:
+  static void create();
+
+  // Blocks and waits for the queue to become non-empty.
+  static void wait();
+
+  // Pushes a deduplication candidate onto a specific GC worker queue.
+  static void push(uint worker_id, oop java_string);
+
+  // Pops a deduplication candidate from any queue, returns NULL if
+  // all queues are empty.
+  static oop pop();
+
+  static void unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl);
+
+  static void print_statistics(outputStream* st);
+  static void verify();
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPQUEUE_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupStat.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupStat.cpp
@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1StringDedupStat.hpp"
+
+G1StringDedupStat::G1StringDedupStat() :
+  _inspected(0),
+  _skipped(0),
+  _hashed(0),
+  _known(0),
+  _new(0),
+  _new_bytes(0),
+  _deduped(0),
+  _deduped_bytes(0),
+  _deduped_young(0),
+  _deduped_young_bytes(0),
+  _deduped_old(0),
+  _deduped_old_bytes(0),
+  _idle(0),
+  _exec(0),
+  _block(0),
+  _start(0.0),
+  _idle_elapsed(0.0),
+  _exec_elapsed(0.0),
+  _block_elapsed(0.0) {
+}
+
+void G1StringDedupStat::add(const G1StringDedupStat& stat) {
+  _inspected           += stat._inspected;
+  _skipped             += stat._skipped;
+  _hashed              += stat._hashed;
+  _known               += stat._known;
+  _new                 += stat._new;
+  _new_bytes           += stat._new_bytes;
+  _deduped             += stat._deduped;
+  _deduped_bytes       += stat._deduped_bytes;
+  _deduped_young       += stat._deduped_young;
+  _deduped_young_bytes += stat._deduped_young_bytes;
+  _deduped_old         += stat._deduped_old;
+  _deduped_old_bytes   += stat._deduped_old_bytes;
+  _idle                += stat._idle;
+  _exec                += stat._exec;
+  _block               += stat._block;
+  _idle_elapsed        += stat._idle_elapsed;
+  _exec_elapsed        += stat._exec_elapsed;
+  _block_elapsed       += stat._block_elapsed;
+}
+
+void G1StringDedupStat::print_summary(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat) {
+  double total_deduped_bytes_percent = 0.0;
+
+  if (total_stat._new_bytes > 0) {
+    // Avoid division by zero
+    total_deduped_bytes_percent = (double)total_stat._deduped_bytes / (double)total_stat._new_bytes * 100.0;
+  }
+
+  st->date_stamp(PrintGCDateStamps);
+  st->stamp(PrintGCTimeStamps);
+  st->print_cr(
+    "[GC concurrent-string-deduplication, "
+    G1_STRDEDUP_BYTES_FORMAT_NS"->"G1_STRDEDUP_BYTES_FORMAT_NS"("G1_STRDEDUP_BYTES_FORMAT_NS"), avg "
+    G1_STRDEDUP_PERCENT_FORMAT_NS", "G1_STRDEDUP_TIME_FORMAT"]",
+    G1_STRDEDUP_BYTES_PARAM(last_stat._new_bytes),
+    G1_STRDEDUP_BYTES_PARAM(last_stat._new_bytes - last_stat._deduped_bytes),
+    G1_STRDEDUP_BYTES_PARAM(last_stat._deduped_bytes),
+    total_deduped_bytes_percent,
+    last_stat._exec_elapsed);
+}
+
+void G1StringDedupStat::print_statistics(outputStream* st, const G1StringDedupStat& stat, bool total) {
+  double young_percent               = 0.0;
+  double old_percent                 = 0.0;
+  double skipped_percent             = 0.0;
+  double hashed_percent              = 0.0;
+  double known_percent               = 0.0;
+  double new_percent                 = 0.0;
+  double deduped_percent             = 0.0;
+  double deduped_bytes_percent       = 0.0;
+  double deduped_young_percent       = 0.0;
+  double deduped_young_bytes_percent = 0.0;
+  double deduped_old_percent         = 0.0;
+  double deduped_old_bytes_percent   = 0.0;
+
+  if (stat._inspected > 0) {
+    // Avoid division by zero
+    skipped_percent = (double)stat._skipped / (double)stat._inspected * 100.0;
+    hashed_percent  = (double)stat._hashed / (double)stat._inspected * 100.0;
+    known_percent   = (double)stat._known / (double)stat._inspected * 100.0;
+    new_percent     = (double)stat._new / (double)stat._inspected * 100.0;
+  }
+
+  if (stat._new > 0) {
+    // Avoid division by zero
+    deduped_percent = (double)stat._deduped / (double)stat._new * 100.0;
+  }
+
+  if (stat._deduped > 0) {
+    // Avoid division by zero
+    deduped_young_percent = (double)stat._deduped_young / (double)stat._deduped * 100.0;
+    deduped_old_percent   = (double)stat._deduped_old / (double)stat._deduped * 100.0;
+  }
+
+  if (stat._new_bytes > 0) {
+    // Avoid division by zero
+    deduped_bytes_percent = (double)stat._deduped_bytes / (double)stat._new_bytes * 100.0;
+  }
+
+  if (stat._deduped_bytes > 0) {
+    // Avoid division by zero
+    deduped_young_bytes_percent = (double)stat._deduped_young_bytes / (double)stat._deduped_bytes * 100.0;
+    deduped_old_bytes_percent   = (double)stat._deduped_old_bytes / (double)stat._deduped_bytes * 100.0;
+  }
+
+  if (total) {
+    st->print_cr(
+      "   [Total Exec: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT", Idle: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT", Blocked: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT"]",
+      stat._exec, stat._exec_elapsed, stat._idle, stat._idle_elapsed, stat._block, stat._block_elapsed);
+  } else {
+    st->print_cr(
+      "   [Last Exec: "G1_STRDEDUP_TIME_FORMAT", Idle: "G1_STRDEDUP_TIME_FORMAT", Blocked: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT"]",
+      stat._exec_elapsed, stat._idle_elapsed, stat._block, stat._block_elapsed);
+  }
+  st->print_cr(
+    "      [Inspected:    "G1_STRDEDUP_OBJECTS_FORMAT"]\n"
+    "         [Skipped:   "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n"
+    "         [Hashed:    "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n"
+    "         [Known:     "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n"
+    "         [New:       "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"]\n"
+    "      [Deduplicated: "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n"
+    "         [Young:     "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n"
+    "         [Old:       "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]",
+    stat._inspected,
+    stat._skipped, skipped_percent,
+    stat._hashed, hashed_percent,
+    stat._known, known_percent,
+    stat._new, new_percent, G1_STRDEDUP_BYTES_PARAM(stat._new_bytes),
+    stat._deduped, deduped_percent, G1_STRDEDUP_BYTES_PARAM(stat._deduped_bytes), deduped_bytes_percent,
+    stat._deduped_young, deduped_young_percent, G1_STRDEDUP_BYTES_PARAM(stat._deduped_young_bytes), deduped_young_bytes_percent,
+    stat._deduped_old, deduped_old_percent, G1_STRDEDUP_BYTES_PARAM(stat._deduped_old_bytes), deduped_old_bytes_percent);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupStat.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupStat.hpp
@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPSTAT_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPSTAT_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/os.hpp"
+
+// Macros for GC log output formating
+#define G1_STRDEDUP_OBJECTS_FORMAT         UINTX_FORMAT_W(12)
+#define G1_STRDEDUP_TIME_FORMAT            "%1.7lf secs"
+#define G1_STRDEDUP_PERCENT_FORMAT         "%5.1lf%%"
+#define G1_STRDEDUP_PERCENT_FORMAT_NS      "%.1lf%%"
+#define G1_STRDEDUP_BYTES_FORMAT           "%8.1lf%s"
+#define G1_STRDEDUP_BYTES_FORMAT_NS        "%.1lf%s"
+#define G1_STRDEDUP_BYTES_PARAM(bytes)     byte_size_in_proper_unit((double)(bytes)), proper_unit_for_byte_size((bytes))
+
+//
+// Statistics gathered by the deduplication thread.
+//
+class G1StringDedupStat : public StackObj {
+private:
+  // Counters
+  uintx  _inspected;
+  uintx  _skipped;
+  uintx  _hashed;
+  uintx  _known;
+  uintx  _new;
+  uintx  _new_bytes;
+  uintx  _deduped;
+  uintx  _deduped_bytes;
+  uintx  _deduped_young;
+  uintx  _deduped_young_bytes;
+  uintx  _deduped_old;
+  uintx  _deduped_old_bytes;
+  uintx  _idle;
+  uintx  _exec;
+  uintx  _block;
+
+  // Time spent by the deduplication thread in different phases
+  double _start;
+  double _idle_elapsed;
+  double _exec_elapsed;
+  double _block_elapsed;
+
+public:
+  G1StringDedupStat();
+
+  void inc_inspected() {
+    _inspected++;
+  }
+
+  void inc_skipped() {
+    _skipped++;
+  }
+
+  void inc_hashed() {
+    _hashed++;
+  }
+
+  void inc_known() {
+    _known++;
+  }
+
+  void inc_new(uintx bytes) {
+    _new++;
+    _new_bytes += bytes;
+  }
+
+  void inc_deduped_young(uintx bytes) {
+    _deduped++;
+    _deduped_bytes += bytes;
+    _deduped_young++;
+    _deduped_young_bytes += bytes;
+  }
+
+  void inc_deduped_old(uintx bytes) {
+    _deduped++;
+    _deduped_bytes += bytes;
+    _deduped_old++;
+    _deduped_old_bytes += bytes;
+  }
+
+  void mark_idle() {
+    _start = os::elapsedTime();
+    _idle++;
+  }
+
+  void mark_exec() {
+    double now = os::elapsedTime();
+    _idle_elapsed = now - _start;
+    _start = now;
+    _exec++;
+  }
+
+  void mark_block() {
+    double now = os::elapsedTime();
+    _exec_elapsed += now - _start;
+    _start = now;
+    _block++;
+  }
+
+  void mark_unblock() {
+    double now = os::elapsedTime();
+    _block_elapsed += now - _start;
+    _start = now;
+  }
+
+  void mark_done() {
+    double now = os::elapsedTime();
+    _exec_elapsed += now - _start;
+  }
+
+  void add(const G1StringDedupStat& stat);
+
+  static void print_summary(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat);
+  static void print_statistics(outputStream* st, const G1StringDedupStat& stat, bool total);
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPSTAT_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp
@ -0,0 +1,569 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/altHashing.hpp"
+#include "classfile/javaClasses.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/g1StringDedupTable.hpp"
+#include "memory/gcLocker.hpp"
+#include "memory/padded.inline.hpp"
+#include "oops/typeArrayOop.hpp"
+#include "runtime/mutexLocker.hpp"
+
+//
+// Freelist in the deduplication table entry cache. Links table
+// entries together using their _next fields.
+//
+class G1StringDedupEntryFreeList : public CHeapObj<mtGC> {
+private:
+  G1StringDedupEntry* _list;
+  size_t              _length;
+
+public:
+  G1StringDedupEntryFreeList() :
+    _list(NULL),
+    _length(0) {
+  }
+
+  void add(G1StringDedupEntry* entry) {
+    entry->set_next(_list);
+    _list = entry;
+    _length++;
+  }
+
+  G1StringDedupEntry* remove() {
+    G1StringDedupEntry* entry = _list;
+    if (entry != NULL) {
+      _list = entry->next();
+      _length--;
+    }
+    return entry;
+  }
+
+  size_t length() {
+    return _length;
+  }
+};
+
+//
+// Cache of deduplication table entries. This cache provides fast allocation and
+// reuse of table entries to lower the pressure on the underlying allocator.
+// But more importantly, it provides fast/deferred freeing of table entries. This
+// is important because freeing of table entries is done during stop-the-world
+// phases and it is not uncommon for large number of entries to be freed at once.
+// Tables entries that are freed during these phases are placed onto a freelist in
+// the cache. The deduplication thread, which executes in a concurrent phase, will
+// later reuse or free the underlying memory for these entries.
+//
+// The cache allows for single-threaded allocations and multi-threaded frees.
+// Allocations are synchronized by StringDedupTable_lock as part of a table
+// modification.
+//
+class G1StringDedupEntryCache : public CHeapObj<mtGC> {
+private:
+  // One freelist per GC worker to allow lock less freeing of
+  // entries while doing a parallel scan of the table. Using
+  // PaddedEnd to avoid false sharing.
+  PaddedEnd<G1StringDedupEntryFreeList>* _lists;
+  size_t                                 _nlists;
+
+public:
+  G1StringDedupEntryCache();
+  ~G1StringDedupEntryCache();
+
+  // Get a table entry from the cache freelist, or allocate a new
+  // entry if the cache is empty.
+  G1StringDedupEntry* alloc();
+
+  // Insert a table entry into the cache freelist.
+  void free(G1StringDedupEntry* entry, uint worker_id);
+
+  // Returns current number of entries in the cache.
+  size_t size();
+
+  // If the cache has grown above the given max size, trim it down
+  // and deallocate the memory occupied by trimmed of entries.
+  void trim(size_t max_size);
+};
+
+G1StringDedupEntryCache::G1StringDedupEntryCache() {
+  _nlists = MAX2(ParallelGCThreads, (size_t)1);
+  _lists = PaddedArray<G1StringDedupEntryFreeList, mtGC>::create_unfreeable((uint)_nlists);
+}
+
+G1StringDedupEntryCache::~G1StringDedupEntryCache() {
+  ShouldNotReachHere();
+}
+
+G1StringDedupEntry* G1StringDedupEntryCache::alloc() {
+  for (size_t i = 0; i < _nlists; i++) {
+    G1StringDedupEntry* entry = _lists[i].remove();
+    if (entry != NULL) {
+      return entry;
+    }
+  }
+  return new G1StringDedupEntry();
+}
+
+void G1StringDedupEntryCache::free(G1StringDedupEntry* entry, uint worker_id) {
+  assert(entry->obj() != NULL, "Double free");
+  assert(worker_id < _nlists, "Invalid worker id");
+  entry->set_obj(NULL);
+  entry->set_hash(0);
+  _lists[worker_id].add(entry);
+}
+
+size_t G1StringDedupEntryCache::size() {
+  size_t size = 0;
+  for (size_t i = 0; i < _nlists; i++) {
+    size += _lists[i].length();
+  }
+  return size;
+}
+
+void G1StringDedupEntryCache::trim(size_t max_size) {
+  size_t cache_size = 0;
+  for (size_t i = 0; i < _nlists; i++) {
+    G1StringDedupEntryFreeList* list = &_lists[i];
+    cache_size += list->length();
+    while (cache_size > max_size) {
+      G1StringDedupEntry* entry = list->remove();
+      assert(entry != NULL, "Should not be null");
+      cache_size--;
+      delete entry;
+    }
+  }
+}
+
+G1StringDedupTable*      G1StringDedupTable::_table = NULL;
+G1StringDedupEntryCache* G1StringDedupTable::_entry_cache = NULL;
+
+const size_t             G1StringDedupTable::_min_size = (1 << 10);   // 1024
+const size_t             G1StringDedupTable::_max_size = (1 << 24);   // 16777216
+const double             G1StringDedupTable::_grow_load_factor = 2.0; // Grow table at 200% load
+const double             G1StringDedupTable::_shrink_load_factor = _grow_load_factor / 3.0; // Shrink table at 67% load
+const double             G1StringDedupTable::_max_cache_factor = 0.1; // Cache a maximum of 10% of the table size
+const uintx              G1StringDedupTable::_rehash_multiple = 60;   // Hash bucket has 60 times more collisions than expected
+const uintx              G1StringDedupTable::_rehash_threshold = (uintx)(_rehash_multiple * _grow_load_factor);
+
+uintx                    G1StringDedupTable::_entries_added = 0;
+uintx                    G1StringDedupTable::_entries_removed = 0;
+uintx                    G1StringDedupTable::_resize_count = 0;
+uintx                    G1StringDedupTable::_rehash_count = 0;
+
+G1StringDedupTable::G1StringDedupTable(size_t size, jint hash_seed) :
+  _size(size),
+  _entries(0),
+  _grow_threshold((uintx)(size * _grow_load_factor)),
+  _shrink_threshold((uintx)(size * _shrink_load_factor)),
+  _rehash_needed(false),
+  _hash_seed(hash_seed) {
+  assert(is_power_of_2(size), "Table size must be a power of 2");
+  _buckets = NEW_C_HEAP_ARRAY(G1StringDedupEntry*, _size, mtGC);
+  memset(_buckets, 0, _size * sizeof(G1StringDedupEntry*));
+}
+
+G1StringDedupTable::~G1StringDedupTable() {
+  FREE_C_HEAP_ARRAY(G1StringDedupEntry*, _buckets, mtGC);
+}
+
+void G1StringDedupTable::create() {
+  assert(_table == NULL, "One string deduplication table allowed");
+  _entry_cache = new G1StringDedupEntryCache();
+  _table = new G1StringDedupTable(_min_size);
+}
+
+void G1StringDedupTable::add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list) {
+  G1StringDedupEntry* entry = _entry_cache->alloc();
+  entry->set_obj(value);
+  entry->set_hash(hash);
+  entry->set_next(*list);
+  *list = entry;
+  _entries++;
+}
+
+void G1StringDedupTable::remove(G1StringDedupEntry** pentry, uint worker_id) {
+  G1StringDedupEntry* entry = *pentry;
+  *pentry = entry->next();
+  _entry_cache->free(entry, worker_id);
+}
+
+void G1StringDedupTable::transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest) {
+  G1StringDedupEntry* entry = *pentry;
+  *pentry = entry->next();
+  unsigned int hash = entry->hash();
+  size_t index = dest->hash_to_index(hash);
+  G1StringDedupEntry** list = dest->bucket(index);
+  entry->set_next(*list);
+  *list = entry;
+}
+
+bool G1StringDedupTable::equals(typeArrayOop value1, typeArrayOop value2) {
+  return (value1 == value2 ||
+          (value1->length() == value2->length() &&
+           (!memcmp(value1->base(T_CHAR),
+                    value2->base(T_CHAR),
+                    value1->length() * sizeof(jchar)))));
+}
+
+typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, unsigned int hash,
+                                        G1StringDedupEntry** list, uintx &count) {
+  for (G1StringDedupEntry* entry = *list; entry != NULL; entry = entry->next()) {
+    if (entry->hash() == hash) {
+      typeArrayOop existing_value = entry->obj();
+      if (equals(value, existing_value)) {
+        // Match found
+        return existing_value;
+      }
+    }
+    count++;
+  }
+
+  // Not found
+  return NULL;
+}
+
+typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, unsigned int hash) {
+  size_t index = hash_to_index(hash);
+  G1StringDedupEntry** list = bucket(index);
+  uintx count = 0;
+
+  // Lookup in list
+  typeArrayOop existing_value = lookup(value, hash, list, count);
+
+  // Check if rehash is needed
+  if (count > _rehash_threshold) {
+    _rehash_needed = true;
+  }
+
+  if (existing_value == NULL) {
+    // Not found, add new entry
+    add(value, hash, list);
+
+    // Update statistics
+    _entries_added++;
+  }
+
+  return existing_value;
+}
+
+unsigned int G1StringDedupTable::hash_code(typeArrayOop value) {
+  unsigned int hash;
+  int length = value->length();
+  const jchar* data = (jchar*)value->base(T_CHAR);
+
+  if (use_java_hash()) {
+    hash = java_lang_String::hash_code(data, length);
+  } else {
+    hash = AltHashing::murmur3_32(_table->_hash_seed, data, length);
+  }
+
+  return hash;
+}
+
+void G1StringDedupTable::deduplicate(oop java_string, G1StringDedupStat& stat) {
+  assert(java_lang_String::is_instance(java_string), "Must be a string");
+  No_Safepoint_Verifier nsv;
+
+  stat.inc_inspected();
+
+  typeArrayOop value = java_lang_String::value(java_string);
+  if (value == NULL) {
+    // String has no value
+    stat.inc_skipped();
+    return;
+  }
+
+  unsigned int hash = 0;
+
+  if (use_java_hash()) {
+    // Get hash code from cache
+    hash = java_lang_String::hash(java_string);
+  }
+
+  if (hash == 0) {
+    // Compute hash
+    hash = hash_code(value);
+    stat.inc_hashed();
+  }
+
+  if (use_java_hash() && hash != 0) {
+    // Store hash code in cache
+    java_lang_String::set_hash(java_string, hash);
+  }
+
+  typeArrayOop existing_value = lookup_or_add(value, hash);
+  if (existing_value == value) {
+    // Same value, already known
+    stat.inc_known();
+    return;
+  }
+
+  // Get size of value array
+  uintx size_in_bytes = value->size() * HeapWordSize;
+  stat.inc_new(size_in_bytes);
+
+  if (existing_value != NULL) {
+    // Enqueue the reference to make sure it is kept alive. Concurrent mark might
+    // otherwise declare it dead if there are no other strong references to this object.
+    G1SATBCardTableModRefBS::enqueue(existing_value);
+
+    // Existing value found, deduplicate string
+    java_lang_String::set_value(java_string, existing_value);
+
+    if (G1CollectedHeap::heap()->is_in_young(value)) {
+      stat.inc_deduped_young(size_in_bytes);
+    } else {
+      stat.inc_deduped_old(size_in_bytes);
+    }
+  }
+}
+
+G1StringDedupTable* G1StringDedupTable::prepare_resize() {
+  size_t size = _table->_size;
+
+  // Check if the hashtable needs to be resized
+  if (_table->_entries > _table->_grow_threshold) {
+    // Grow table, double the size
+    size *= 2;
+    if (size > _max_size) {
+      // Too big, don't resize
+      return NULL;
+    }
+  } else if (_table->_entries < _table->_shrink_threshold) {
+    // Shrink table, half the size
+    size /= 2;
+    if (size < _min_size) {
+      // Too small, don't resize
+      return NULL;
+    }
+  } else if (StringDeduplicationResizeALot) {
+    // Force grow
+    size *= 2;
+    if (size > _max_size) {
+      // Too big, force shrink instead
+      size /= 4;
+    }
+  } else {
+    // Resize not needed
+    return NULL;
+  }
+
+  // Update statistics
+  _resize_count++;
+
+  // Allocate the new table. The new table will be populated by workers
+  // calling unlink_or_oops_do() and finally installed by finish_resize().
+  return new G1StringDedupTable(size, _table->_hash_seed);
+}
+
+void G1StringDedupTable::finish_resize(G1StringDedupTable* resized_table) {
+  assert(resized_table != NULL, "Invalid table");
+
+  resized_table->_entries = _table->_entries;
+
+  // Free old table
+  delete _table;
+
+  // Install new table
+  _table = resized_table;
+}
+
+void G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, uint worker_id) {
+  // The table is divided into partitions to allow lock-less parallel processing by
+  // multiple worker threads. A worker thread first claims a partition, which ensures
+  // exclusive access to that part of the table, then continues to process it. To allow
+  // shrinking of the table in parallel we also need to make sure that the same worker
+  // thread processes all partitions where entries will hash to the same destination
+  // partition. Since the table size is always a power of two and we always shrink by
+  // dividing the table in half, we know that for a given partition there is only one
+  // other partition whoes entries will hash to the same destination partition. That
+  // other partition is always the sibling partition in the second half of the table.
+  // For example, if the table is divided into 8 partitions, the sibling of partition 0
+  // is partition 4, the sibling of partition 1 is partition 5, etc.
+  size_t table_half = _table->_size / 2;
+
+  // Let each partition be one page worth of buckets
+  size_t partition_size = MIN2(table_half, os::vm_page_size() / sizeof(G1StringDedupEntry*));
+  assert(table_half % partition_size == 0, "Invalid partition size");
+
+  // Number of entries removed during the scan
+  uintx removed = 0;
+
+  for (;;) {
+    // Grab next partition to scan
+    size_t partition_begin = cl->claim_table_partition(partition_size);
+    size_t partition_end = partition_begin + partition_size;
+    if (partition_begin >= table_half) {
+      // End of table
+      break;
+    }
+
+    // Scan the partition followed by the sibling partition in the second half of the table
+    removed += unlink_or_oops_do(cl, partition_begin, partition_end, worker_id);
+    removed += unlink_or_oops_do(cl, table_half + partition_begin, table_half + partition_end, worker_id);
+  }
+
+  // Delayed update avoid contention on the table lock
+  if (removed > 0) {
+    MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
+    _table->_entries -= removed;
+    _entries_removed += removed;
+  }
+}
+
+uintx G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl,
+                                            size_t partition_begin,
+                                            size_t partition_end,
+                                            uint worker_id) {
+  uintx removed = 0;
+  for (size_t bucket = partition_begin; bucket < partition_end; bucket++) {
+    G1StringDedupEntry** entry = _table->bucket(bucket);
+    while (*entry != NULL) {
+      oop* p = (oop*)(*entry)->obj_addr();
+      if (cl->is_alive(*p)) {
+        cl->keep_alive(p);
+        if (cl->is_resizing()) {
+          // We are resizing the table, transfer entry to the new table
+          _table->transfer(entry, cl->resized_table());
+        } else {
+          if (cl->is_rehashing()) {
+            // We are rehashing the table, rehash the entry but keep it
+            // in the table. We can't transfer entries into the new table
+            // at this point since we don't have exclusive access to all
+            // destination partitions. finish_rehash() will do a single
+            // threaded transfer of all entries.
+            typeArrayOop value = (typeArrayOop)*p;
+            unsigned int hash = hash_code(value);
+            (*entry)->set_hash(hash);
+          }
+
+          // Move to next entry
+          entry = (*entry)->next_addr();
+        }
+      } else {
+        // Not alive, remove entry from table
+        _table->remove(entry, worker_id);
+        removed++;
+      }
+    }
+  }
+
+  return removed;
+}
+
+G1StringDedupTable* G1StringDedupTable::prepare_rehash() {
+  if (!_table->_rehash_needed && !StringDeduplicationRehashALot) {
+    // Rehash not needed
+    return NULL;
+  }
+
+  // Update statistics
+  _rehash_count++;
+
+  // Compute new hash seed
+  _table->_hash_seed = AltHashing::compute_seed();
+
+  // Allocate the new table, same size and hash seed
+  return new G1StringDedupTable(_table->_size, _table->_hash_seed);
+}
+
+void G1StringDedupTable::finish_rehash(G1StringDedupTable* rehashed_table) {
+  assert(rehashed_table != NULL, "Invalid table");
+
+  // Move all newly rehashed entries into the correct buckets in the new table
+  for (size_t bucket = 0; bucket < _table->_size; bucket++) {
+    G1StringDedupEntry** entry = _table->bucket(bucket);
+    while (*entry != NULL) {
+      _table->transfer(entry, rehashed_table);
+    }
+  }
+
+  rehashed_table->_entries = _table->_entries;
+
+  // Free old table
+  delete _table;
+
+  // Install new table
+  _table = rehashed_table;
+}
+
+void G1StringDedupTable::verify() {
+  for (size_t bucket = 0; bucket < _table->_size; bucket++) {
+    // Verify entries
+    G1StringDedupEntry** entry = _table->bucket(bucket);
+    while (*entry != NULL) {
+      typeArrayOop value = (*entry)->obj();
+      guarantee(value != NULL, "Object must not be NULL");
+      guarantee(Universe::heap()->is_in_reserved(value), "Object must be on the heap");
+      guarantee(!value->is_forwarded(), "Object must not be forwarded");
+      guarantee(value->is_typeArray(), "Object must be a typeArrayOop");
+      unsigned int hash = hash_code(value);
+      guarantee((*entry)->hash() == hash, "Table entry has inorrect hash");
+      guarantee(_table->hash_to_index(hash) == bucket, "Table entry has incorrect index");
+      entry = (*entry)->next_addr();
+    }
+
+    // Verify that we do not have entries with identical oops or identical arrays.
+    // We only need to compare entries in the same bucket. If the same oop or an
+    // identical array has been inserted more than once into different/incorrect
+    // buckets the verification step above will catch that.
+    G1StringDedupEntry** entry1 = _table->bucket(bucket);
+    while (*entry1 != NULL) {
+      typeArrayOop value1 = (*entry1)->obj();
+      G1StringDedupEntry** entry2 = (*entry1)->next_addr();
+      while (*entry2 != NULL) {
+        typeArrayOop value2 = (*entry2)->obj();
+        guarantee(!equals(value1, value2), "Table entries must not have identical arrays");
+        entry2 = (*entry2)->next_addr();
+      }
+      entry1 = (*entry1)->next_addr();
+    }
+  }
+}
+
+void G1StringDedupTable::trim_entry_cache() {
+  MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
+  size_t max_cache_size = (size_t)(_table->_size * _max_cache_factor);
+  _entry_cache->trim(max_cache_size);
+}
+
+void G1StringDedupTable::print_statistics(outputStream* st) {
+  st->print_cr(
+    "   [Table]\n"
+    "      [Memory Usage: "G1_STRDEDUP_BYTES_FORMAT_NS"]\n"
+    "      [Size: "SIZE_FORMAT", Min: "SIZE_FORMAT", Max: "SIZE_FORMAT"]\n"
+    "      [Entries: "UINTX_FORMAT", Load: "G1_STRDEDUP_PERCENT_FORMAT_NS", Cached: " UINTX_FORMAT ", Added: "UINTX_FORMAT", Removed: "UINTX_FORMAT"]\n"
+    "      [Resize Count: "UINTX_FORMAT", Shrink Threshold: "UINTX_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT_NS"), Grow Threshold: "UINTX_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT_NS")]\n"
+    "      [Rehash Count: "UINTX_FORMAT", Rehash Threshold: "UINTX_FORMAT", Hash Seed: 0x%x]\n"
+    "      [Age Threshold: "UINTX_FORMAT"]",
+    G1_STRDEDUP_BYTES_PARAM(_table->_size * sizeof(G1StringDedupEntry*) + (_table->_entries + _entry_cache->size()) * sizeof(G1StringDedupEntry)),
+    _table->_size, _min_size, _max_size,
+    _table->_entries, (double)_table->_entries / (double)_table->_size * 100.0, _entry_cache->size(), _entries_added, _entries_removed,
+    _resize_count, _table->_shrink_threshold, _shrink_load_factor * 100.0, _table->_grow_threshold, _grow_load_factor * 100.0,
+    _rehash_count, _rehash_threshold, _table->_hash_seed,
+    StringDeduplicationAgeThreshold);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupTable.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupTable.hpp
@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTABLE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTABLE_HPP
+
+#include "gc_implementation/g1/g1StringDedupStat.hpp"
+#include "runtime/mutexLocker.hpp"
+
+class G1StringDedupEntryCache;
+
+//
+// Table entry in the deduplication hashtable. Points weakly to the
+// character array. Can be chained in a linked list in case of hash
+// collisions or when placed in a freelist in the entry cache.
+//
+class G1StringDedupEntry : public CHeapObj<mtGC> {
+private:
+  G1StringDedupEntry* _next;
+  unsigned int      _hash;
+  typeArrayOop      _obj;
+
+public:
+  G1StringDedupEntry() :
+    _next(NULL),
+    _hash(0),
+    _obj(NULL) {
+  }
+
+  G1StringDedupEntry* next() {
+    return _next;
+  }
+
+  G1StringDedupEntry** next_addr() {
+    return &_next;
+  }
+
+  void set_next(G1StringDedupEntry* next) {
+    _next = next;
+  }
+
+  unsigned int hash() {
+    return _hash;
+  }
+
+  void set_hash(unsigned int hash) {
+    _hash = hash;
+  }
+
+  typeArrayOop obj() {
+    return _obj;
+  }
+
+  typeArrayOop* obj_addr() {
+    return &_obj;
+  }
+
+  void set_obj(typeArrayOop obj) {
+    _obj = obj;
+  }
+};
+
+//
+// The deduplication hashtable keeps track of all unique character arrays used
+// by String objects. Each table entry weakly points to an character array, allowing
+// otherwise unreachable character arrays to be declared dead and pruned from the
+// table.
+//
+// The table is dynamically resized to accommodate the current number of table entries.
+// The table has hash buckets with chains for hash collision. If the average chain
+// length goes above or below given thresholds the table grows or shrinks accordingly.
+//
+// The table is also dynamically rehashed (using a new hash seed) if it becomes severely
+// unbalanced, i.e., a hash chain is significantly longer than average.
+//
+// All access to the table is protected by the StringDedupTable_lock, except under
+// safepoints in which case GC workers are allowed to access a table partitions they
+// have claimed without first acquiring the lock. Note however, that this applies only
+// the table partition (i.e. a range of elements in _buckets), not other parts of the
+// table such as the _entries field, statistics counters, etc.
+//
+class G1StringDedupTable : public CHeapObj<mtGC> {
+private:
+  // The currently active hashtable instance. Only modified when
+  // the table is resizes or rehashed.
+  static G1StringDedupTable*      _table;
+
+  // Cache for reuse and fast alloc/free of table entries.
+  static G1StringDedupEntryCache* _entry_cache;
+
+  G1StringDedupEntry**            _buckets;
+  size_t                          _size;
+  uintx                           _entries;
+  uintx                           _shrink_threshold;
+  uintx                           _grow_threshold;
+  bool                            _rehash_needed;
+
+  // The hash seed also dictates which hash function to use. A
+  // zero hash seed means we will use the Java compatible hash
+  // function (which doesn't use a seed), and a non-zero hash
+  // seed means we use the murmur3 hash function.
+  jint                            _hash_seed;
+
+  // Constants governing table resize/rehash/cache.
+  static const size_t             _min_size;
+  static const size_t             _max_size;
+  static const double             _grow_load_factor;
+  static const double             _shrink_load_factor;
+  static const uintx              _rehash_multiple;
+  static const uintx              _rehash_threshold;
+  static const double             _max_cache_factor;
+
+  // Table statistics, only used for logging.
+  static uintx                    _entries_added;
+  static uintx                    _entries_removed;
+  static uintx                    _resize_count;
+  static uintx                    _rehash_count;
+
+  G1StringDedupTable(size_t size, jint hash_seed = 0);
+  ~G1StringDedupTable();
+
+  // Returns the hash bucket at the given index.
+  G1StringDedupEntry** bucket(size_t index) {
+    return _buckets + index;
+  }
+
+  // Returns the hash bucket index for the given hash code.
+  size_t hash_to_index(unsigned int hash) {
+    return (size_t)hash & (_size - 1);
+  }
+
+  // Adds a new table entry to the given hash bucket.
+  void add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list);
+
+  // Removes the given table entry from the table.
+  void remove(G1StringDedupEntry** pentry, uint worker_id);
+
+  // Transfers a table entry from the current table to the destination table.
+  void transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest);
+
+  // Returns an existing character array in the given hash bucket, or NULL
+  // if no matching character array exists.
+  typeArrayOop lookup(typeArrayOop value, unsigned int hash,
+                      G1StringDedupEntry** list, uintx &count);
+
+  // Returns an existing character array in the table, or inserts a new
+  // table entry if no matching character array exists.
+  typeArrayOop lookup_or_add_inner(typeArrayOop value, unsigned int hash);
+
+  // Thread safe lookup or add of table entry
+  static typeArrayOop lookup_or_add(typeArrayOop value, unsigned int hash) {
+    // Protect the table from concurrent access. Also note that this lock
+    // acts as a fence for _table, which could have been replaced by a new
+    // instance if the table was resized or rehashed.
+    MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
+    return _table->lookup_or_add_inner(value, hash);
+  }
+
+  // Returns true if the hashtable is currently using a Java compatible
+  // hash function.
+  static bool use_java_hash() {
+    return _table->_hash_seed == 0;
+  }
+
+  static bool equals(typeArrayOop value1, typeArrayOop value2);
+
+  // Computes the hash code for the given character array, using the
+  // currently active hash function and hash seed.
+  static unsigned int hash_code(typeArrayOop value);
+
+  static uintx unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl,
+                                 size_t partition_begin,
+                                 size_t partition_end,
+                                 uint worker_id);
+
+public:
+  static void create();
+
+  // Deduplicates the given String object, or adds its backing
+  // character array to the deduplication hashtable.
+  static void deduplicate(oop java_string, G1StringDedupStat& stat);
+
+  // If a table resize is needed, returns a newly allocated empty
+  // hashtable of the proper size.
+  static G1StringDedupTable* prepare_resize();
+
+  // Installs a newly resized table as the currently active table
+  // and deletes the previously active table.
+  static void finish_resize(G1StringDedupTable* resized_table);
+
+  // If a table rehash is needed, returns a newly allocated empty
+  // hashtable and updates the hash seed.
+  static G1StringDedupTable* prepare_rehash();
+
+  // Transfers rehashed entries from the currently active table into
+  // the new table. Installs the new table as the currently active table
+  // and deletes the previously active table.
+  static void finish_rehash(G1StringDedupTable* rehashed_table);
+
+  // If the table entry cache has grown too large, trim it down according to policy
+  static void trim_entry_cache();
+
+  static void unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, uint worker_id);
+
+  static void print_statistics(outputStream* st);
+  static void verify();
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTABLE_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupThread.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupThread.cpp
@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"
+#include "gc_implementation/g1/g1StringDedupTable.hpp"
+#include "gc_implementation/g1/g1StringDedupThread.hpp"
+#include "gc_implementation/g1/g1StringDedupQueue.hpp"
+
+G1StringDedupThread* G1StringDedupThread::_thread = NULL;
+
+G1StringDedupThread::G1StringDedupThread() :
+  ConcurrentGCThread() {
+  set_name("String Deduplication Thread");
+  create_and_start();
+}
+
+G1StringDedupThread::~G1StringDedupThread() {
+  ShouldNotReachHere();
+}
+
+void G1StringDedupThread::create() {
+  assert(G1StringDedup::is_enabled(), "String deduplication not enabled");
+  assert(_thread == NULL, "One string deduplication thread allowed");
+  _thread = new G1StringDedupThread();
+}
+
+G1StringDedupThread* G1StringDedupThread::thread() {
+  assert(G1StringDedup::is_enabled(), "String deduplication not enabled");
+  assert(_thread != NULL, "String deduplication thread not created");
+  return _thread;
+}
+
+void G1StringDedupThread::print_on(outputStream* st) const {
+  st->print("\"%s\" ", name());
+  Thread::print_on(st);
+  st->cr();
+}
+
+void G1StringDedupThread::run() {
+  G1StringDedupStat total_stat;
+
+  initialize_in_thread();
+  wait_for_universe_init();
+
+  // Main loop
+  for (;;) {
+    G1StringDedupStat stat;
+
+    stat.mark_idle();
+
+    // Wait for the queue to become non-empty
+    G1StringDedupQueue::wait();
+
+    // Include this thread in safepoints
+    stsJoin();
+
+    stat.mark_exec();
+
+    // Process the queue
+    for (;;) {
+      oop java_string = G1StringDedupQueue::pop();
+      if (java_string == NULL) {
+        break;
+      }
+
+      G1StringDedupTable::deduplicate(java_string, stat);
+
+      // Safepoint this thread if needed
+      if (stsShouldYield()) {
+        stat.mark_block();
+        stsYield(NULL);
+        stat.mark_unblock();
+      }
+    }
+
+    G1StringDedupTable::trim_entry_cache();
+
+    stat.mark_done();
+
+    // Print statistics
+    total_stat.add(stat);
+    print(gclog_or_tty, stat, total_stat);
+
+    // Exclude this thread from safepoints
+    stsLeave();
+  }
+
+  ShouldNotReachHere();
+}
+
+void G1StringDedupThread::print(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat) {
+  if (G1Log::fine() || PrintStringDeduplicationStatistics) {
+    G1StringDedupStat::print_summary(st, last_stat, total_stat);
+    if (PrintStringDeduplicationStatistics) {
+      G1StringDedupStat::print_statistics(st, last_stat, false);
+      G1StringDedupStat::print_statistics(st, total_stat, true);
+      G1StringDedupTable::print_statistics(st);
+      G1StringDedupQueue::print_statistics(st);
+    }
+  }
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupThread.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedupThread.hpp
@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTHREAD_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTHREAD_HPP
+
+#include "gc_implementation/g1/g1StringDedupStat.hpp"
+#include "gc_implementation/shared/concurrentGCThread.hpp"
+
+//
+// The deduplication thread is where the actual deduplication occurs. It waits for
+// deduplication candidates to appear on the deduplication queue, removes them from
+// the queue and tries to deduplicate them. It uses the deduplication hashtable to
+// find identical, already existing, character arrays on the heap. The thread runs
+// concurrently with the Java application but participates in safepoints to allow
+// the GC to adjust and unlink oops from the deduplication queue and table.
+//
+class G1StringDedupThread: public ConcurrentGCThread {
+private:
+  static G1StringDedupThread* _thread;
+
+  G1StringDedupThread();
+  ~G1StringDedupThread();
+
+  void print(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat);
+
+public:
+  static void create();
+  static G1StringDedupThread* thread();
+
+  virtual void run();
+  virtual void print_on(outputStream* st) const;
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTHREAD_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -285,6 +285,10 @@
  product(uintx, G1MixedGCCountTarget, 8,                                   \
          "The target number of mixed GCs after a marking cycle.")          \
                                                                            \
+  experimental(uintx, G1CodeRootsChunkCacheKeepPercent, 10,                 \
+          "The amount of code root chunks that should be kept at most "     \
+          "as percentage of already allocated.")                            \
+                                                                            \
  experimental(uintx, G1OldCSetRegionThresholdPercent, 10,                  \
          "An upper bound for the number of old CSet regions expressed "    \
          "as a percentage of the heap size.")                              \
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
@ -43,8 +43,6 @@ class G1ParCopyClosure;
 class G1ParScanClosure;
 class G1ParPushHeapRSClosure;

-typedef G1ParCopyClosure<G1BarrierEvac, false> G1ParScanHeapEvacClosure;
-
 class FilterIntoCSClosure;
 class FilterOutOfRegionClosure;
 class G1CMOopClosure;
@ -61,7 +59,6 @@ class G1UpdateRSOrPushRefOopClosure;
 #endif

 #define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
-      f(G1ParScanHeapEvacClosure,_nv)                   \
      f(G1ParScanClosure,_nv)                           \
      f(G1ParPushHeapRSClosure,_nv)                     \
      f(FilterIntoCSClosure,_nv)                        \
--- a/Show More
+++ b/Show More