diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
index 94a4771719c..001f94e161e 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@@ -460,6 +460,84 @@ bool ConcurrentMark::not_yet_marked(oop obj) const {
               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 }
 
+CMRootRegions::CMRootRegions() :
+  _young_list(NULL), _cm(NULL), _scan_in_progress(false),
+  _should_abort(false),  _next_survivor(NULL) { }
+
+void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
+  _young_list = g1h->young_list();
+  _cm = cm;
+}
+
+void CMRootRegions::prepare_for_scan() {
+  assert(!scan_in_progress(), "pre-condition");
+
+  // Currently, only survivors can be root regions.
+  assert(_next_survivor == NULL, "pre-condition");
+  _next_survivor = _young_list->first_survivor_region();
+  _scan_in_progress = (_next_survivor != NULL);
+  _should_abort = false;
+}
+
+HeapRegion* CMRootRegions::claim_next() {
+  if (_should_abort) {
+    // If someone has set the should_abort flag, we return NULL to
+    // force the caller to bail out of their loop.
+    return NULL;
+  }
+
+  // Currently, only survivors can be root regions.
+  HeapRegion* res = _next_survivor;
+  if (res != NULL) {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    // Read it again in case it changed while we were waiting for the lock.
+    res = _next_survivor;
+    if (res != NULL) {
+      if (res == _young_list->last_survivor_region()) {
+        // We just claimed the last survivor so store NULL to indicate
+        // that we're done.
+        _next_survivor = NULL;
+      } else {
+        _next_survivor = res->get_next_young_region();
+      }
+    } else {
+      // Someone else claimed the last survivor while we were trying
+      // to take the lock so nothing else to do.
+    }
+  }
+  assert(res == NULL || res->is_survivor(), "post-condition");
+
+  return res;
+}
+
+void CMRootRegions::scan_finished() {
+  assert(scan_in_progress(), "pre-condition");
+
+  // Currently, only survivors can be root regions.
+  if (!_should_abort) {
+    assert(_next_survivor == NULL, "we should have claimed all survivors");
+  }
+  _next_survivor = NULL;
+
+  {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    _scan_in_progress = false;
+    RootRegionScan_lock->notify_all();
+  }
+}
+
+bool CMRootRegions::wait_until_scan_finished() {
+  if (!scan_in_progress()) return false;
+
+  {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    while (scan_in_progress()) {
+      RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
+    }
+  }
+  return true;
+}
+
 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
@@ -548,6 +626,8 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   satb_qs.set_buffer_size(G1SATBBufferSize);
 
+  _root_regions.init(_g1h, this);
+
   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 
@@ -864,6 +944,8 @@ void ConcurrentMark::checkpointRootsInitialPost() {
   satb_mq_set.set_active_all_threads(true, /* new active value */
                                      false /* expected_active */);
 
+  _root_regions.prepare_for_scan();
+
   // update_g1_committed() will be called at the end of an evac pause
   // when marking is on. So, it's also called at the end of the
   // initial-mark pause to update the heap end, if the heap expands
@@ -1157,6 +1239,69 @@ uint ConcurrentMark::calc_parallel_marking_threads() {
   return 0;
 }
 
+void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
+  // Currently, only survivors can be root regions.
+  assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
+  G1RootRegionScanClosure cl(_g1h, this, worker_id);
+
+  const uintx interval = PrefetchScanIntervalInBytes;
+  HeapWord* curr = hr->bottom();
+  const HeapWord* end = hr->top();
+  while (curr < end) {
+    Prefetch::read(curr, interval);
+    oop obj = oop(curr);
+    int size = obj->oop_iterate(&cl);
+    assert(size == obj->size(), "sanity");
+    curr += size;
+  }
+}
+
+class CMRootRegionScanTask : public AbstractGangTask {
+private:
+  ConcurrentMark* _cm;
+
+public:
+  CMRootRegionScanTask(ConcurrentMark* cm) :
+    AbstractGangTask("Root Region Scan"), _cm(cm) { }
+
+  void work(uint worker_id) {
+    assert(Thread::current()->is_ConcurrentGC_thread(),
+           "this should only be done by a conc GC thread");
+
+    CMRootRegions* root_regions = _cm->root_regions();
+    HeapRegion* hr = root_regions->claim_next();
+    while (hr != NULL) {
+      _cm->scanRootRegion(hr, worker_id);
+      hr = root_regions->claim_next();
+    }
+  }
+};
+
+void ConcurrentMark::scanRootRegions() {
+  // scan_in_progress() will have been set to true only if there was
+  // at least one root region to scan. So, if it's false, we
+  // should not attempt to do any further work.
+  if (root_regions()->scan_in_progress()) {
+    _parallel_marking_threads = calc_parallel_marking_threads();
+    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
+           "Maximum number of marking threads exceeded");
+    uint active_workers = MAX2(1U, parallel_marking_threads());
+
+    CMRootRegionScanTask task(this);
+    if (parallel_marking_threads() > 0) {
+      _parallel_workers->set_active_workers((int) active_workers);
+      _parallel_workers->run_task(&task);
+    } else {
+      task.work(0);
+    }
+
+    // It's possible that has_aborted() is true here without actually
+    // aborting the survivor scan earlier. This is OK as it's
+    // mainly used for sanity checking.
+    root_regions()->scan_finished();
+  }
+}
+
 void ConcurrentMark::markFromRoots() {
   // we might be tempted to assert that:
   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
index fc313869fd2..e8795d6ac39 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@@ -349,10 +349,62 @@ typedef enum {
   high_verbose       // per object verbose
 } CMVerboseLevel;
 
+class YoungList;
+
+// Root Regions are regions that are not empty at the beginning of a
+// marking cycle and which we might collect during an evacuation pause
+// while the cycle is active. Given that, during evacuation pauses, we
+// do not copy objects that are explicitly marked, what we have to do
+// for the root regions is to scan them and mark all objects reachable
+// from them. According to the SATB assumptions, we only need to visit
+// each object once during marking. So, as long as we finish this scan
+// before the next evacuation pause, we can copy the objects from the
+// root regions without having to mark them or do anything else to them.
+//
+// Currently, we only support root region scanning once (at the start
+// of the marking cycle) and the root regions are all the survivor
+// regions populated during the initial-mark pause.
+class CMRootRegions VALUE_OBJ_CLASS_SPEC {
+private:
+  YoungList*           _young_list;
+  ConcurrentMark*      _cm;
+
+  volatile bool        _scan_in_progress;
+  volatile bool        _should_abort;
+  HeapRegion* volatile _next_survivor;
+
+public:
+  CMRootRegions();
+  // We actually do most of the initialization in this method.
+  void init(G1CollectedHeap* g1h, ConcurrentMark* cm);
+
+  // Reset the claiming / scanning of the root regions.
+  void prepare_for_scan();
+
+  // Forces get_next() to return NULL so that the iteration aborts early.
+  void abort() { _should_abort = true; }
+
+  // Return true if the CM thread are actively scanning root regions,
+  // false otherwise.
+  bool scan_in_progress() { return _scan_in_progress; }
+
+  // Claim the next root region to scan atomically, or return NULL if
+  // all have been claimed.
+  HeapRegion* claim_next();
+
+  // Flag that we're done with root region scanning and notify anyone
+  // who's waiting on it. If aborted is false, assume that all regions
+  // have been claimed.
+  void scan_finished();
+
+  // If CM threads are still scanning root regions, wait until they
+  // are done. Return true if we had to wait, false otherwise.
+  bool wait_until_scan_finished();
+};
 
 class ConcurrentMarkThread;
 
-class ConcurrentMark: public CHeapObj {
+class ConcurrentMark : public CHeapObj {
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
@@ -400,6 +452,9 @@ protected:
   HeapWord*               _heap_start;
   HeapWord*               _heap_end;
 
+  // Root region tracking and claiming.
+  CMRootRegions           _root_regions;
+
   // For gray objects
   CMMarkStack             _markStack; // Grey objects behind global finger.
   CMRegionStack           _regionStack; // Grey regions behind global finger.
@@ -553,9 +608,9 @@ protected:
   bool has_overflown()           { return _has_overflown; }
   void set_has_overflown()       { _has_overflown = true; }
   void clear_has_overflown()     { _has_overflown = false; }
+  bool restart_for_overflow()    { return _restart_for_overflow; }
 
   bool has_aborted()             { return _has_aborted; }
-  bool restart_for_overflow()    { return _restart_for_overflow; }
 
   // Methods to enter the two overflow sync barriers
   void enter_first_sync_barrier(int task_num);
@@ -691,6 +746,8 @@ public:
   // Returns true if there are any aborted memory regions.
   bool has_aborted_regions();
 
+  CMRootRegions* root_regions() { return &_root_regions; }
+
   bool concurrent_marking_in_progress() {
     return _concurrent_marking_in_progress;
   }
@@ -741,8 +798,17 @@ public:
   // G1CollectedHeap
 
   // This notifies CM that a root during initial-mark needs to be
-  // grayed. It is MT-safe.
-  inline void grayRoot(oop obj, size_t word_size, uint worker_id);
+  // grayed. It is MT-safe. word_size is the size of the object in
+  // words. It is passed explicitly as sometimes we cannot calculate
+  // it from the given object because it might be in an inconsistent
+  // state (e.g., in to-space and being copied). So the caller is
+  // responsible for dealing with this issue (e.g., get the size from
+  // the from-space image when the to-space image might be
+  // inconsistent) and always passing the size. hr is the region that
+  // contains the object and it's passed optionally from callers who
+  // might already have it (no point in recalculating it).
+  inline void grayRoot(oop obj, size_t word_size,
+                       uint worker_id, HeapRegion* hr = NULL);
 
   // It's used during evacuation pauses to gray a region, if
   // necessary, and it's MT-safe. It assumes that the caller has
@@ -793,6 +859,13 @@ public:
   void checkpointRootsInitialPre();
   void checkpointRootsInitialPost();
 
+  // Scan all the root regions and mark everything reachable from
+  // them.
+  void scanRootRegions();
+
+  // Scan a single root region and mark everything reachable from it.
+  void scanRootRegion(HeapRegion* hr, uint worker_id);
+
   // Do concurrent phase of marking, to a tentative transitive closure.
   void markFromRoots();
 
@@ -972,6 +1045,10 @@ public:
                            size_t* marked_bytes_array,
                            BitMap* task_card_bm);
 
+  // Counts the given memory region in the task/worker counting
+  // data structures for the given worker id.
+  inline void count_region(MemRegion mr, HeapRegion* hr, uint worker_id);
+
   // Counts the given memory region in the task/worker counting
   // data structures for the given worker id.
   inline void count_region(MemRegion mr, uint worker_id);
@@ -992,6 +1069,12 @@ public:
                                  size_t* marked_bytes_array,
                                  BitMap* task_card_bm);
 
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the task/worker counting structures for the
+  // given worker id.
+  inline bool par_mark_and_count(oop obj, size_t word_size,
+                                 HeapRegion* hr, uint worker_id);
+
   // Attempts to mark the given object and, if successful, counts
   // the object in the task/worker counting structures for the
   // given worker id.
diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
index 84a0af566be..27c3411051d 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
@@ -81,14 +81,22 @@ inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
   }
 }
 
+// Counts the given memory region in the task/worker counting
+// data structures for the given worker id.
+inline void ConcurrentMark::count_region(MemRegion mr,
+                                         HeapRegion* hr,
+                                         uint worker_id) {
+  size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
+  BitMap* task_card_bm = count_card_bitmap_for(worker_id);
+  count_region(mr, hr, marked_bytes_array, task_card_bm);
+}
+
 // Counts the given memory region, which may be a single object, in the
 // task/worker counting data structures for the given worker id.
 inline void ConcurrentMark::count_region(MemRegion mr, uint worker_id) {
-  size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
-  BitMap* task_card_bm = count_card_bitmap_for(worker_id);
   HeapWord* addr = mr.start();
   HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
-  count_region(mr, hr, marked_bytes_array, task_card_bm);
+  count_region(mr, hr, worker_id);
 }
 
 // Counts the given object in the given task/worker counting data structures.
@@ -102,7 +110,9 @@ inline void ConcurrentMark::count_object(oop obj,
 
 // Counts the given object in the task/worker counting data
 // structures for the given worker id.
-inline void ConcurrentMark::count_object(oop obj, HeapRegion* hr, uint worker_id) {
+inline void ConcurrentMark::count_object(oop obj,
+                                         HeapRegion* hr,
+                                         uint worker_id) {
   size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
   BitMap* task_card_bm = count_card_bitmap_for(worker_id);
   HeapWord* addr = (HeapWord*) obj;
@@ -124,6 +134,22 @@ inline bool ConcurrentMark::par_mark_and_count(oop obj,
   return false;
 }
 
+// Attempts to mark the given object and, if successful, counts
+// the object in the task/worker counting structures for the
+// given worker id.
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               size_t word_size,
+                                               HeapRegion* hr,
+                                               uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    MemRegion mr(addr, word_size);
+    count_region(mr, hr, worker_id);
+    return true;
+  }
+  return false;
+}
+
 // Attempts to mark the given object and, if successful, counts
 // the object in the task/worker counting structures for the
 // given worker id.
@@ -342,20 +368,20 @@ inline void ConcurrentMark::markPrev(oop p) {
   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
 }
 
-inline void ConcurrentMark::grayRoot(oop obj, size_t word_size, uint worker_id) {
+inline void ConcurrentMark::grayRoot(oop obj, size_t word_size,
+                                     uint worker_id, HeapRegion* hr) {
+  assert(obj != NULL, "pre-condition");
   HeapWord* addr = (HeapWord*) obj;
-
-  // Currently we don't do anything with word_size but we will use it
-  // in the very near future in the liveness calculation piggy-backing
-  // changes.
-
-#ifdef ASSERT
-  HeapRegion* hr = _g1h->heap_region_containing(addr);
+  if (hr == NULL) {
+    hr = _g1h->heap_region_containing_raw(addr);
+  } else {
+    assert(hr->is_in(addr), "pre-condition");
+  }
   assert(hr != NULL, "sanity");
-  assert(!hr->is_survivor(), "should not allocate survivors during IM");
-  assert(addr < hr->next_top_at_mark_start(),
-         err_msg("addr: "PTR_FORMAT" hr: "HR_FORMAT" NTAMS: "PTR_FORMAT,
-                 addr, HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start()));
+  // Given that we're looking for a region that contains an object
+  // header it's impossible to get back a HC region.
+  assert(!hr->continuesHumongous(), "sanity");
+
   // We cannot assert that word_size == obj->size() given that obj
   // might not be in a consistent state (another thread might be in
   // the process of copying it). So the best thing we can do is to
@@ -365,10 +391,11 @@ inline void ConcurrentMark::grayRoot(oop obj, size_t word_size, uint worker_id)
          err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
                  word_size * HeapWordSize, hr->capacity(),
                  HR_FORMAT_PARAMS(hr)));
-#endif // ASSERT
 
-  if (!_nextMarkBitMap->isMarked(addr)) {
-    par_mark_and_count(obj, word_size, worker_id);
+  if (addr < hr->next_top_at_mark_start()) {
+    if (!_nextMarkBitMap->isMarked(addr)) {
+      par_mark_and_count(obj, word_size, hr, worker_id);
+    }
   }
 }
 
diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
index bf1cd89e85e..4dce3689175 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
@@ -92,9 +92,36 @@ void ConcurrentMarkThread::run() {
       ResourceMark rm;
       HandleMark   hm;
       double cycle_start = os::elapsedVTime();
-      double mark_start_sec = os::elapsedTime();
       char verbose_str[128];
 
+      // We have to ensure that we finish scanning the root regions
+      // before the next GC takes place. To ensure this we have to
+      // make sure that we do not join the STS until the root regions
+      // have been scanned. If we did then it's possible that a
+      // subsequent GC could block us from joining the STS and proceed
+      // without the root regions have been scanned which would be a
+      // correctness issue.
+
+      double scan_start = os::elapsedTime();
+      if (!cm()->has_aborted()) {
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
+        }
+
+        _cm->scanRootRegions();
+
+        double scan_end = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf]",
+                                 scan_end - scan_start);
+        }
+      }
+
+      double mark_start_sec = os::elapsedTime();
       if (PrintGC) {
         gclog_or_tty->date_stamp(PrintGCDateStamps);
         gclog_or_tty->stamp(PrintGCTimeStamps);
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index dfda7ab2ed5..35470ea85d1 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -174,13 +174,10 @@ public:
   }
 };
 
-YoungList::YoungList(G1CollectedHeap* g1h)
-  : _g1h(g1h), _head(NULL),
-    _length(0),
-    _last_sampled_rs_lengths(0),
-    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0)
-{
-  guarantee( check_list_empty(false), "just making sure..." );
+YoungList::YoungList(G1CollectedHeap* g1h) :
+    _g1h(g1h), _head(NULL), _length(0), _last_sampled_rs_lengths(0),
+    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0) {
+  guarantee(check_list_empty(false), "just making sure...");
 }
 
 void YoungList::push_region(HeapRegion *hr) {
@@ -1270,7 +1267,18 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,
     double start = os::elapsedTime();
     g1_policy()->record_full_collection_start();
 
+    // Note: When we have a more flexible GC logging framework that
+    // allows us to add optional attributes to a GC log record we
+    // could consider timing and reporting how long we wait in the
+    // following two methods.
     wait_while_free_regions_coming();
+    // If we start the compaction before the CM threads finish
+    // scanning the root regions we might trip them over as we'll
+    // be moving objects / updating references. So let's wait until
+    // they are done. By telling them to abort, they should complete
+    // early.
+    _cm->root_regions()->abort();
+    _cm->root_regions()->wait_until_scan_finished();
     append_secondary_free_list_if_not_empty_with_lock();
 
     gc_prologue(true);
@@ -1299,7 +1307,8 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,
     ref_processor_cm()->verify_no_references_recorded();
 
     // Abandon current iterations of concurrent marking and concurrent
-    // refinement, if any are in progress.
+    // refinement, if any are in progress. We have to do this before
+    // wait_until_scan_finished() below.
     concurrent_mark()->abort();
 
     // Make sure we'll choose a new allocation region afterwards.
@@ -3675,6 +3684,18 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
         g1_policy()->record_collection_pause_start(start_time_sec,
                                                    start_used_bytes);
 
+        double scan_wait_start = os::elapsedTime();
+        // We have to wait until the CM threads finish scanning the
+        // root regions as it's the only way to ensure that all the
+        // objects on them have been correctly scanned before we start
+        // moving them during the GC.
+        bool waited = _cm->root_regions()->wait_until_scan_finished();
+        if (waited) {
+          double scan_wait_end = os::elapsedTime();
+          double wait_time_ms = (scan_wait_end - scan_wait_start) * 1000.0;
+          g1_policy()->record_root_region_scan_wait_time(wait_time_ms);
+        }
+
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nAfter recording pause start.\nYoung_list:");
         _young_list->print();
@@ -3784,6 +3805,9 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
         }
 
         if (g1_policy()->during_initial_mark_pause()) {
+          // We have to do this before we notify the CM threads that
+          // they can start working to make sure that all the
+          // appropriate initialization is done on the CM object.
           concurrent_mark()->checkpointRootsInitialPost();
           set_marking_started();
           // Note that we don't actually trigger the CM thread at
@@ -5773,8 +5797,9 @@ void G1CollectedHeap::set_free_regions_coming() {
 }
 
 void G1CollectedHeap::reset_free_regions_coming() {
+  assert(free_regions_coming(), "pre-condition");
+
   {
-    assert(free_regions_coming(), "pre-condition");
     MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
     _free_regions_coming = false;
     SecondaryFreeList_lock->notify_all();
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
index 89fa51be308..ffbf25c221c 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@@ -141,6 +141,7 @@ G1CollectorPolicy::G1CollectorPolicy() :
 
   _cur_clear_ct_time_ms(0.0),
   _mark_closure_time_ms(0.0),
+  _root_region_scan_wait_time_ms(0.0),
 
   _cur_ref_proc_time_ms(0.0),
   _cur_ref_enq_time_ms(0.0),
@@ -903,19 +904,10 @@ void G1CollectorPolicy::record_collection_pause_start(double start_time_sec,
     gclog_or_tty->print(" (%s)", gcs_are_young() ? "young" : "mixed");
   }
 
-  if (!during_initial_mark_pause()) {
-    // We only need to do this here as the policy will only be applied
-    // to the GC we're about to start. so, no point is calculating this
-    // every time we calculate / recalculate the target young length.
-    update_survivors_policy();
-  } else {
-    // The marking phase has a "we only copy implicitly live
-    // objects during marking" invariant. The easiest way to ensure it
-    // holds is not to allocate any survivor regions and tenure all
-    // objects. In the future we might change this and handle survivor
-    // regions specially during marking.
-    tenure_all_objects();
-  }
+  // We only need to do this here as the policy will only be applied
+  // to the GC we're about to start. so, no point is calculating this
+  // every time we calculate / recalculate the target young length.
+  update_survivors_policy();
 
   assert(_g1->used() == _g1->recalculate_used(),
          err_msg("sanity, used: "SIZE_FORMAT" recalculate_used: "SIZE_FORMAT,
@@ -967,6 +959,9 @@ void G1CollectorPolicy::record_collection_pause_start(double start_time_sec,
   // This is initialized to zero here and is set during
   // the evacuation pause if marking is in progress.
   _cur_satb_drain_time_ms = 0.0;
+  // This is initialized to zero here and is set during the evacuation
+  // pause if we actually waited for the root region scanning to finish.
+  _root_region_scan_wait_time_ms = 0.0;
 
   _last_gc_was_young = false;
 
@@ -1271,6 +1266,10 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
   // is in progress.
   other_time_ms -= _cur_satb_drain_time_ms;
 
+  // Subtract the root region scanning wait time. It's initialized to
+  // zero at the start of the pause.
+  other_time_ms -= _root_region_scan_wait_time_ms;
+
   if (parallel) {
     other_time_ms -= _cur_collection_par_time_ms;
   } else {
@@ -1303,6 +1302,8 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
     // each other. Therefore we unconditionally record the SATB drain
     // time - even if it's zero.
     body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
+    body_summary->record_root_region_scan_wait_time_ms(
+                                               _root_region_scan_wait_time_ms);
 
     body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
     body_summary->record_satb_filtering_time_ms(satb_filtering_time);
@@ -1399,6 +1400,9 @@ void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
+    if (_root_region_scan_wait_time_ms > 0.0) {
+      print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
+    }
     if (parallel) {
       print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
       print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
@@ -2002,6 +2006,7 @@ void G1CollectorPolicy::print_summary(PauseSummary* summary) const {
   if (summary->get_total_seq()->num() > 0) {
     print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq());
     if (body_summary != NULL) {
+      print_summary(1, "Root Region Scan Wait", body_summary->get_root_region_scan_wait_seq());
       if (parallel) {
         print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
         print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
@@ -2043,15 +2048,17 @@ void G1CollectorPolicy::print_summary(PauseSummary* summary) const {
           // parallel
           NumberSeq* other_parts[] = {
             body_summary->get_satb_drain_seq(),
+            body_summary->get_root_region_scan_wait_seq(),
             body_summary->get_parallel_seq(),
             body_summary->get_clear_ct_seq()
           };
           calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                                3, other_parts);
+                                          4, other_parts);
         } else {
           // serial
           NumberSeq* other_parts[] = {
             body_summary->get_satb_drain_seq(),
+            body_summary->get_root_region_scan_wait_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_ext_root_scan_seq(),
             body_summary->get_satb_filtering_seq(),
@@ -2059,7 +2066,7 @@ void G1CollectorPolicy::print_summary(PauseSummary* summary) const {
             body_summary->get_obj_copy_seq()
           };
           calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                                6, other_parts);
+                                          7, other_parts);
         }
         check_other_times(1,  summary->get_other_seq(), &calc_other_times_ms);
       }
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
index a7f521bf9a5..ba18a4a6ef5 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
@@ -65,6 +65,7 @@ public:
 
 class MainBodySummary: public CHeapObj {
   define_num_seq(satb_drain) // optional
+  define_num_seq(root_region_scan_wait)
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
     define_num_seq(satb_filtering)
@@ -715,6 +716,7 @@ private:
   double _mark_remark_start_sec;
   double _mark_cleanup_start_sec;
   double _mark_closure_time_ms;
+  double _root_region_scan_wait_time_ms;
 
   // Update the young list target length either by setting it to the
   // desired fixed value or by calculating it using G1's pause
@@ -817,6 +819,10 @@ public:
     _mark_closure_time_ms = mark_closure_time_ms;
   }
 
+  void record_root_region_scan_wait_time(double time_ms) {
+    _root_region_scan_wait_time_ms = time_ms;
+  }
+
   void record_concurrent_mark_remark_start();
   void record_concurrent_mark_remark_end();
 
@@ -1147,11 +1153,6 @@ public:
     _survivor_surv_rate_group->stop_adding_regions();
   }
 
-  void tenure_all_objects() {
-    _max_survivor_regions = 0;
-    _tenuring_threshold = 0;
-  }
-
   void record_survivor_regions(size_t      regions,
                                HeapRegion* head,
                                HeapRegion* tail) {
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp
index 1dd8cc765f5..84d998265a9 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp
@@ -127,7 +127,7 @@ public:
         // explicitly and all objects in the CSet are considered
         // (implicitly) live. So, we won't mark them explicitly and
         // we'll leave them over NTAMS.
-        _cm->grayRoot(obj, obj_size, _worker_id);
+        _cm->grayRoot(obj, obj_size, _worker_id, _hr);
       }
       _marked_bytes += (obj_size * HeapWordSize);
       obj->set_mark(markOopDesc::prototype());
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
index 0930d2d4dd8..f7ccb79cf3e 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
@@ -220,6 +220,7 @@ public:
 
 // Closure for iterating over object fields during concurrent marking
 class G1CMOopClosure : public OopClosure {
+private:
   G1CollectedHeap*   _g1h;
   ConcurrentMark*    _cm;
   CMTask*            _task;
@@ -230,4 +231,19 @@ public:
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
 
+// Closure to scan the root regions during concurrent marking
+class G1RootRegionScanClosure : public OopClosure {
+private:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark*  _cm;
+  uint _worker_id;
+public:
+  G1RootRegionScanClosure(G1CollectedHeap* g1h, ConcurrentMark* cm,
+                          uint worker_id) :
+    _g1h(g1h), _cm(cm), _worker_id(worker_id) { }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(      oop* p) { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
index 26b951d961f..5db2340bc07 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,7 +39,8 @@
 // perf-critical inner loop.
 #define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
 
-template <class T> inline void FilterIntoCSClosure::do_oop_nv(T* p) {
+template <class T>
+inline void FilterIntoCSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop) &&
       _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
@@ -53,7 +54,8 @@ template <class T> inline void FilterIntoCSClosure::do_oop_nv(T* p) {
 
 #define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
 
-template <class T> inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
+template <class T>
+inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
     HeapWord* obj_hw = (HeapWord*)oopDesc::decode_heap_oop_not_null(heap_oop);
@@ -67,7 +69,8 @@ template <class T> inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
 }
 
 // This closure is applied to the fields of the objects that have just been copied.
-template <class T> inline void G1ParScanClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1ParScanClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
   if (!oopDesc::is_null(heap_oop)) {
@@ -96,7 +99,8 @@ template <class T> inline void G1ParScanClosure::do_oop_nv(T* p) {
   }
 }
 
-template <class T> inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
   if (!oopDesc::is_null(heap_oop)) {
@@ -111,7 +115,8 @@ template <class T> inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
   }
 }
 
-template <class T> inline void G1CMOopClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1CMOopClosure::do_oop_nv(T* p) {
   assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
   assert(!_g1h->is_on_master_free_list(
                     _g1h->heap_region_containing((HeapWord*) p)), "invariant");
@@ -125,4 +130,16 @@ template <class T> inline void G1CMOopClosure::do_oop_nv(T* p) {
   _task->deal_with_reference(obj);
 }
 
+template <class T>
+inline void G1RootRegionScanClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+    HeapRegion* hr = _g1h->heap_region_containing((HeapWord*) obj);
+    if (hr != NULL) {
+      _cm->grayRoot(obj, obj->size(), _worker_id, hr);
+    }
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_INLINE_HPP
diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
index 55d05d998fe..1498b94a4e9 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
@@ -72,10 +72,11 @@ inline void HeapRegion::note_end_of_marking() {
 }
 
 inline void HeapRegion::note_start_of_copying(bool during_initial_mark) {
-  if (during_initial_mark) {
-    if (is_survivor()) {
-      assert(false, "should not allocate survivors during IM");
-    } else {
+  if (is_survivor()) {
+    // This is how we always allocate survivors.
+    assert(_next_top_at_mark_start == bottom(), "invariant");
+  } else {
+    if (during_initial_mark) {
       // During initial-mark we'll explicitly mark any objects on old
       // regions that are pointed to by roots. Given that explicit
       // marks only make sense under NTAMS it'd be nice if we could
@@ -84,11 +85,6 @@ inline void HeapRegion::note_start_of_copying(bool during_initial_mark) {
       // NTAMS to the end of the region so all marks will be below
       // NTAMS. We'll set it to the actual top when we retire this region.
       _next_top_at_mark_start = end();
-    }
-  } else {
-    if (is_survivor()) {
-      // This is how we always allocate survivors.
-      assert(_next_top_at_mark_start == bottom(), "invariant");
     } else {
       // We could have re-used this old region as to-space over a
       // couple of GCs since the start of the concurrent marking
@@ -101,19 +97,15 @@ inline void HeapRegion::note_start_of_copying(bool during_initial_mark) {
 }
 
 inline void HeapRegion::note_end_of_copying(bool during_initial_mark) {
-  if (during_initial_mark) {
-    if (is_survivor()) {
-      assert(false, "should not allocate survivors during IM");
-    } else {
+  if (is_survivor()) {
+    // This is how we always allocate survivors.
+    assert(_next_top_at_mark_start == bottom(), "invariant");
+  } else {
+    if (during_initial_mark) {
       // See the comment for note_start_of_copying() for the details
       // on this.
       assert(_next_top_at_mark_start == end(), "pre-condition");
       _next_top_at_mark_start = top();
-    }
-  } else {
-    if (is_survivor()) {
-      // This is how we always allocate survivors.
-      assert(_next_top_at_mark_start == bottom(), "invariant");
     } else {
       // See the comment for note_start_of_copying() for the details
       // on this.
diff --git a/hotspot/src/share/vm/runtime/mutexLocker.cpp b/hotspot/src/share/vm/runtime/mutexLocker.cpp
index 7653b5bef84..dae98f08dcb 100644
--- a/hotspot/src/share/vm/runtime/mutexLocker.cpp
+++ b/hotspot/src/share/vm/runtime/mutexLocker.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -126,13 +126,14 @@ Mutex*   OopMapCacheAlloc_lock        = NULL;
 Mutex*   FreeList_lock                = NULL;
 Monitor* SecondaryFreeList_lock       = NULL;
 Mutex*   OldSets_lock                 = NULL;
+Monitor* RootRegionScan_lock          = NULL;
 Mutex*   MMUTracker_lock              = NULL;
 Mutex*   HotCardCache_lock            = NULL;
 
 Monitor* GCTaskManager_lock           = NULL;
 
 Mutex*   Management_lock              = NULL;
-Monitor* Service_lock               = NULL;
+Monitor* Service_lock                 = NULL;
 
 #define MAX_NUM_MUTEX 128
 static Monitor * _mutex_array[MAX_NUM_MUTEX];
@@ -193,6 +194,7 @@ void mutex_init() {
     def(FreeList_lock              , Mutex,   leaf     ,   true );
     def(SecondaryFreeList_lock     , Monitor, leaf     ,   true );
     def(OldSets_lock               , Mutex  , leaf     ,   true );
+    def(RootRegionScan_lock        , Monitor, leaf     ,   true );
     def(MMUTracker_lock            , Mutex  , leaf     ,   true );
     def(HotCardCache_lock          , Mutex  , special  ,   true );
     def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true );
diff --git a/hotspot/src/share/vm/runtime/mutexLocker.hpp b/hotspot/src/share/vm/runtime/mutexLocker.hpp
index 37a3d3132b4..884640595c5 100644
--- a/hotspot/src/share/vm/runtime/mutexLocker.hpp
+++ b/hotspot/src/share/vm/runtime/mutexLocker.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -115,7 +115,7 @@ extern Mutex*   OsrList_lock;                    // a lock used to serialize acc
 
 #ifndef PRODUCT
 extern Mutex*   FullGCALot_lock;                 // a lock to make FullGCALot MT safe
-#endif
+#endif // PRODUCT
 extern Mutex*   Debug1_lock;                     // A bunch of pre-allocated locks that can be used for tracing
 extern Mutex*   Debug2_lock;                     // down synchronization related bugs!
 extern Mutex*   Debug3_lock;
@@ -129,6 +129,7 @@ extern Mutex*   OopMapCacheAlloc_lock;           // protects allocation of oop_m
 extern Mutex*   FreeList_lock;                   // protects the free region list during safepoints
 extern Monitor* SecondaryFreeList_lock;          // protects the secondary free region list
 extern Mutex*   OldSets_lock;                    // protects the old region sets
+extern Monitor* RootRegionScan_lock;             // used to notify that the CM threads have finished scanning the IM snapshot regions
 extern Mutex*   MMUTracker_lock;                 // protects the MMU
                                                  // tracker data structures
 extern Mutex*   HotCardCache_lock;               // protects the hot card cache