From a4a4c34e961dfa163bb26e285df18d1324dcf739 Mon Sep 17 00:00:00 2001
From: Mikael Gerdin <mgerdin@openjdk.org>
Date: Thu, 14 Mar 2013 10:54:44 +0100
Subject: [PATCH 1/6] 8005602: NPG: classunloading does not happen while CMS GC
 with -XX:+CMSClassUnloadingEnabled is used

Call purge() on CLDG after sweep(), reorder purge() call in GenCollectedHeap

Reviewed-by: jmasa, stefank
---
 .../concurrentMarkSweep/concurrentMarkSweepGeneration.cpp | 4 ++++
 hotspot/src/share/vm/memory/genCollectedHeap.cpp          | 7 ++-----
 hotspot/src/share/vm/memory/metaspace.cpp                 | 8 +++++---
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
index 1d056338c7d..df6aabbb527 100644
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@@ -6068,6 +6068,10 @@ void CMSCollector::sweep(bool asynch) {
   verify_work_stacks_empty();
   verify_overflow_empty();
 
+  if (should_unload_classes()) {
+    ClassLoaderDataGraph::purge();
+  }
+
   _intra_sweep_timer.stop();
   _intra_sweep_estimate.sample(_intra_sweep_timer.seconds());
 
diff --git a/hotspot/src/share/vm/memory/genCollectedHeap.cpp b/hotspot/src/share/vm/memory/genCollectedHeap.cpp
index 9d65cc15701..6b7402c8203 100644
--- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp
@@ -554,6 +554,8 @@ void GenCollectedHeap::do_collection(bool  full,
     }
 
     if (complete) {
+      // Delete metaspaces for unloaded class loaders and clean up loader_data graph
+      ClassLoaderDataGraph::purge();
       // Resize the metaspace capacity after full collections
       MetaspaceGC::compute_new_size();
       update_full_collections_completed();
@@ -564,11 +566,6 @@ void GenCollectedHeap::do_collection(bool  full,
 
     gc_epilogue(complete);
 
-    // Delete metaspaces for unloaded class loaders and clean up loader_data graph
-    if (complete) {
-      ClassLoaderDataGraph::purge();
-    }
-
     if (must_restore_marks_for_biased_locking) {
       BiasedLocking::restore_marks();
     }
diff --git a/hotspot/src/share/vm/memory/metaspace.cpp b/hotspot/src/share/vm/memory/metaspace.cpp
index 56b14c22cf7..9fa054fe0eb 100644
--- a/hotspot/src/share/vm/memory/metaspace.cpp
+++ b/hotspot/src/share/vm/memory/metaspace.cpp
@@ -1309,8 +1309,7 @@ void MetaspaceGC::compute_new_size() {
       gclog_or_tty->print_cr("  metaspace HWM: %.1fK", new_capacity_until_GC / (double) K);
     }
   }
-  assert(vsl->used_bytes_sum() == used_after_gc &&
-         used_after_gc <= vsl->capacity_bytes_sum(),
+  assert(used_after_gc <= vsl->capacity_bytes_sum(),
          "sanity check");
 
 }
@@ -1970,6 +1969,9 @@ void SpaceManager::initialize() {
 }
 
 SpaceManager::~SpaceManager() {
+  // This call this->_lock which can't be done while holding expand_lock()
+  const size_t in_use_before = sum_capacity_in_chunks_in_use();
+
   MutexLockerEx fcl(SpaceManager::expand_lock(),
                     Mutex::_no_safepoint_check_flag);
 
@@ -1987,7 +1989,7 @@ SpaceManager::~SpaceManager() {
 
   // Have to update before the chunks_in_use lists are emptied
   // below.
-  chunk_manager->inc_free_chunks_total(sum_capacity_in_chunks_in_use(),
+  chunk_manager->inc_free_chunks_total(in_use_before,
                                        sum_count_in_chunks_in_use());
 
   // Add all the chunks in use by this space manager

From e1df78e39021875579857673174090e73bad66b5 Mon Sep 17 00:00:00 2001
From: Thomas Schatzl <tschatzl@openjdk.org>
Date: Thu, 14 Mar 2013 09:37:38 +0100
Subject: [PATCH 2/6] 6733980: par compact - TraceGen1Time always shows 0.0000
 seconds

Use the correct collector to retrieve accumulated gen1 trace time

Reviewed-by: johnc, jmasa
---
 .../gc_implementation/parallelScavenge/parallelScavengeHeap.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
index 21e08b6f874..0c39e69e1be 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
@@ -656,7 +656,7 @@ void ParallelScavengeHeap::print_tracing_info() const {
     tty->print_cr("[Accumulated GC generation 0 time %3.7f secs]", time);
   }
   if (TraceGen1Time) {
-    double time = PSMarkSweep::accumulated_time()->seconds();
+    double time = UseParallelOldGC ? PSParallelCompact::accumulated_time()->seconds() : PSMarkSweep::accumulated_time()->seconds();
     tty->print_cr("[Accumulated GC generation 1 time %3.7f secs]", time);
   }
 }

From d41c0fce7fc6a0ec7e8d0fbde8216db41aeab918 Mon Sep 17 00:00:00 2001
From: John Cuthbertson <johnc@openjdk.org>
Date: Mon, 18 Mar 2013 11:05:27 -0700
Subject: [PATCH 3/6] 8009536: G1: Apache Lucene hang during reference
 processing

In CMTask::do_marking_step(), Skip offering termination and entering the first and second synchronization barriers if called from a serial context, i.e. the VM thread.

Reviewed-by: brutisso, tschatzl
---
 .../gc_implementation/g1/concurrentMark.cpp   | 186 +++++++++++-------
 .../gc_implementation/g1/concurrentMark.hpp   |   6 +-
 2 files changed, 116 insertions(+), 76 deletions(-)

diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
index 2a462481faa..63cb028da1f 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@@ -1065,8 +1065,8 @@ public:
         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 
         the_task->do_marking_step(mark_step_duration_ms,
-                                  true /* do_stealing    */,
-                                  true /* do_termination */);
+                                  true  /* do_termination */,
+                                  false /* is_serial*/);
 
         double end_time_sec = os::elapsedTime();
         double end_vtime_sec = os::elapsedVTime();
@@ -2184,14 +2184,17 @@ bool G1CMIsAliveClosure::do_object_b(oop obj) {
 // operating on the global stack.
 
 class G1CMKeepAliveAndDrainClosure: public OopClosure {
-  ConcurrentMark*  _cm;
-  CMTask*          _task;
-  int              _ref_counter_limit;
-  int              _ref_counter;
+  ConcurrentMark* _cm;
+  CMTask*         _task;
+  int             _ref_counter_limit;
+  int             _ref_counter;
+  bool            _is_serial;
  public:
-  G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
-    _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval) {
+  G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
+    _cm(cm), _task(task), _is_serial(is_serial),
+    _ref_counter_limit(G1RefProcDrainInterval) {
     assert(_ref_counter_limit > 0, "sanity");
+    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
     _ref_counter = _ref_counter_limit;
   }
 
@@ -2230,8 +2233,8 @@ class G1CMKeepAliveAndDrainClosure: public OopClosure {
         do {
           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
           _task->do_marking_step(mark_step_duration_ms,
-                                 false /* do_stealing    */,
-                                 false /* do_termination */);
+                                 false      /* do_termination */,
+                                 _is_serial);
         } while (_task->has_aborted() && !_cm->has_overflown());
         _ref_counter = _ref_counter_limit;
       }
@@ -2253,27 +2256,18 @@ class G1CMKeepAliveAndDrainClosure: public OopClosure {
 class G1CMDrainMarkingStackClosure: public VoidClosure {
   ConcurrentMark* _cm;
   CMTask*         _task;
-  bool            _do_stealing;
-  bool            _do_termination;
+  bool            _is_serial;
  public:
-  G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_par) :
-    _cm(cm), _task(task) {
-    assert(is_par || _task->worker_id() == 0,
-           "Only task for worker 0 should be used if ref processing is single threaded");
-    // We only allow stealing and only enter the termination protocol
-    // in CMTask::do_marking_step() if this closure is being instantiated
-    // for parallel reference processing.
-    _do_stealing = _do_termination = is_par;
+  G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
+    _cm(cm), _task(task), _is_serial(is_serial) {
+    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
   }
 
   void do_void() {
     do {
       if (_cm->verbose_high()) {
-        gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - "
-                               "stealing: %s, termination: %s",
-                               _task->worker_id(),
-                               BOOL_TO_STR(_do_stealing),
-                               BOOL_TO_STR(_do_termination));
+        gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
+                               _task->worker_id(), BOOL_TO_STR(_is_serial));
       }
 
       // We call CMTask::do_marking_step() to completely drain the local
@@ -2294,8 +2288,8 @@ class G1CMDrainMarkingStackClosure: public VoidClosure {
       // has_aborted() flag that the marking step has completed.
 
       _task->do_marking_step(1000000000.0 /* something very large */,
-                             _do_stealing,
-                             _do_termination);
+                             true         /* do_termination */,
+                             _is_serial);
     } while (_task->has_aborted() && !_cm->has_overflown());
   }
 };
@@ -2328,7 +2322,6 @@ class G1CMRefProcTaskProxy: public AbstractGangTask {
   ProcessTask&     _proc_task;
   G1CollectedHeap* _g1h;
   ConcurrentMark*  _cm;
-  bool             _processing_is_mt;
 
 public:
   G1CMRefProcTaskProxy(ProcessTask& proc_task,
@@ -2336,15 +2329,15 @@ public:
                      ConcurrentMark* cm) :
     AbstractGangTask("Process reference objects in parallel"),
     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
-      ReferenceProcessor* rp = _g1h->ref_processor_cm();
-      _processing_is_mt = rp->processing_is_mt();
-    }
+    ReferenceProcessor* rp = _g1h->ref_processor_cm();
+    assert(rp->processing_is_mt(), "shouldn't be here otherwise");
+  }
 
   virtual void work(uint worker_id) {
-    CMTask* marking_task = _cm->task(worker_id);
+    CMTask* task = _cm->task(worker_id);
     G1CMIsAliveClosure g1_is_alive(_g1h);
-    G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
-    G1CMDrainMarkingStackClosure g1_par_drain(_cm, marking_task, _processing_is_mt);
+    G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
+    G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
 
     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
   }
@@ -2415,26 +2408,35 @@ void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
     rp->setup_policy(clear_all_soft_refs);
     assert(_markStack.isEmpty(), "mark stack should be empty");
 
-    // Non-MT instances 'Keep Alive' and 'Complete GC' oop closures.
-    G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0));
-    G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), false);
-
-    // We need at least one active thread. If reference processing is
-    // not multi-threaded we use the current (ConcurrentMarkThread) thread,
-    // otherwise we use the work gang from the G1CollectedHeap and we
-    // utilize all the worker threads we can.
-    uint active_workers = (rp->processing_is_mt() && g1h->workers() != NULL
-                                ? g1h->workers()->active_workers()
-                                : 1U);
+    // Instances of the 'Keep Alive' and 'Complete GC' closures used
+    // in serial reference processing. Note these closures are also
+    // used for serially processing (by the the current thread) the
+    // JNI references during parallel reference processing.
+    //
+    // These closures do not need to synchronize with the worker
+    // threads involved in parallel reference processing as these
+    // instances are executed serially by the current thread (e.g.
+    // reference processing is not multi-threaded and is thus
+    // performed by the current thread instead of a gang worker).
+    //
+    // The gang tasks involved in parallel reference procssing create
+    // their own instances of these closures, which do their own
+    // synchronization among themselves.
+    G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
+    G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
 
+    // We need at least one active thread. If reference processing
+    // is not multi-threaded we use the current (VMThread) thread,
+    // otherwise we use the work gang from the G1CollectedHeap and
+    // we utilize all the worker threads we can.
+    bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
+    uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
 
+    // Parallel processing task executor.
     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
                                               g1h->workers(), active_workers);
-
-    AbstractRefProcTaskExecutor* executor = (rp->processing_is_mt()
-                                                ? &par_task_executor
-                                                : NULL);
+    AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
 
     // Set the degree of MT processing here.  If the discovery was done MT,
     // the number of threads involved during discovery could differ from
@@ -2454,6 +2456,7 @@ void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
 
     assert(_markStack.overflow() || _markStack.isEmpty(),
             "mark stack should be empty (unless it overflowed)");
+
     if (_markStack.overflow()) {
       // This should have been done already when we tried to push an
       // entry on to the global mark stack. But let's do it again.
@@ -2482,8 +2485,8 @@ void ConcurrentMark::swapMarkBitMaps() {
 
 class CMRemarkTask: public AbstractGangTask {
 private:
-  ConcurrentMark *_cm;
-
+  ConcurrentMark* _cm;
+  bool            _is_serial;
 public:
   void work(uint worker_id) {
     // Since all available tasks are actually started, we should
@@ -2493,8 +2496,8 @@ public:
       task->record_start_time();
       do {
         task->do_marking_step(1000000000.0 /* something very large */,
-                              true /* do_stealing    */,
-                              true /* do_termination */);
+                              true         /* do_termination       */,
+                              _is_serial);
       } while (task->has_aborted() && !_cm->has_overflown());
       // If we overflow, then we do not want to restart. We instead
       // want to abort remark and do concurrent marking again.
@@ -2502,8 +2505,8 @@ public:
     }
   }
 
-  CMRemarkTask(ConcurrentMark* cm, int active_workers) :
-    AbstractGangTask("Par Remark"), _cm(cm) {
+  CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
+    AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
     _cm->terminator()->reset_for_reuse(active_workers);
   }
 };
@@ -2530,20 +2533,26 @@ void ConcurrentMark::checkpointRootsFinalWork() {
     // constructor and pass values of the active workers
     // through the gang in the task.
 
-    CMRemarkTask remarkTask(this, active_workers);
+    CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
+    // We will start all available threads, even if we decide that the
+    // active_workers will be fewer. The extra ones will just bail out
+    // immediately.
     g1h->set_par_threads(active_workers);
     g1h->workers()->run_task(&remarkTask);
     g1h->set_par_threads(0);
   } else {
     G1CollectedHeap::StrongRootsScope srs(g1h);
-    // this is remark, so we'll use up all available threads
     uint active_workers = 1;
     set_phase(active_workers, false /* concurrent */);
 
-    CMRemarkTask remarkTask(this, active_workers);
-    // We will start all available threads, even if we decide that the
-    // active_workers will be fewer. The extra ones will just bail out
-    // immediately.
+    // Note - if there's no work gang then the VMThread will be
+    // the thread to execute the remark - serially. We have
+    // to pass true for the is_serial parameter so that
+    // CMTask::do_marking_step() doesn't enter the sync
+    // barriers in the event of an overflow. Doing so will
+    // cause an assert that the current thread is not a
+    // concurrent GC thread.
+    CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
     remarkTask.work(0);
   }
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
@@ -3854,8 +3863,8 @@ void CMTask::print_stats() {
 
 /*****************************************************************************
 
-    The do_marking_step(time_target_ms) method is the building block
-    of the parallel marking framework. It can be called in parallel
+    The do_marking_step(time_target_ms, ...) method is the building
+    block of the parallel marking framework. It can be called in parallel
     with other invocations of do_marking_step() on different tasks
     (but only one per task, obviously) and concurrently with the
     mutator threads, or during remark, hence it eliminates the need
@@ -3865,7 +3874,7 @@ void CMTask::print_stats() {
     pauses too, since do_marking_step() ensures that it aborts before
     it needs to yield.
 
-    The data structures that is uses to do marking work are the
+    The data structures that it uses to do marking work are the
     following:
 
       (1) Marking Bitmap. If there are gray objects that appear only
@@ -3951,11 +3960,25 @@ void CMTask::print_stats() {
     place, it was natural to piggy-back all the other conditions on it
     too and not constantly check them throughout the code.
 
+    If do_termination is true then do_marking_step will enter its
+    termination protocol.
+
+    The value of is_serial must be true when do_marking_step is being
+    called serially (i.e. by the VMThread) and do_marking_step should
+    skip any synchronization in the termination and overflow code.
+    Examples include the serial remark code and the serial reference
+    processing closures.
+
+    The value of is_serial must be false when do_marking_step is
+    being called by any of the worker threads in a work gang.
+    Examples include the concurrent marking code (CMMarkingTask),
+    the MT remark code, and the MT reference processing closures.
+
  *****************************************************************************/
 
 void CMTask::do_marking_step(double time_target_ms,
-                             bool do_stealing,
-                             bool do_termination) {
+                             bool do_termination,
+                             bool is_serial) {
   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
   assert(concurrent() == _cm->concurrent(), "they should be the same");
 
@@ -3976,6 +3999,12 @@ void CMTask::do_marking_step(double time_target_ms,
   _start_time_ms = os::elapsedVTime() * 1000.0;
   statsOnly( _interval_start_time_ms = _start_time_ms );
 
+  // If do_stealing is true then do_marking_step will attempt to
+  // steal work from the other CMTasks. It only makes sense to
+  // enable stealing when the termination protocol is enabled
+  // and do_marking_step() is not being called serially.
+  bool do_stealing = do_termination && !is_serial;
+
   double diff_prediction_ms =
     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
   _time_target_ms = time_target_ms - diff_prediction_ms;
@@ -4237,10 +4266,12 @@ void CMTask::do_marking_step(double time_target_ms,
     }
 
     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
+
     // The CMTask class also extends the TerminatorTerminator class,
     // hence its should_exit_termination() method will also decide
     // whether to exit the termination protocol or not.
-    bool finished = _cm->terminator()->offer_termination(this);
+    bool finished = (is_serial ||
+                     _cm->terminator()->offer_termination(this));
     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
     _termination_time_ms +=
       termination_end_time_ms - _termination_start_time_ms;
@@ -4320,19 +4351,26 @@ void CMTask::do_marking_step(double time_target_ms,
         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
       }
 
-      _cm->enter_first_sync_barrier(_worker_id);
-      // When we exit this sync barrier we know that all tasks have
-      // stopped doing marking work. So, it's now safe to
-      // re-initialise our data structures. At the end of this method,
-      // task 0 will clear the global data structures.
+      if (!is_serial) {
+        // We only need to enter the sync barrier if being called
+        // from a parallel context
+        _cm->enter_first_sync_barrier(_worker_id);
+
+        // When we exit this sync barrier we know that all tasks have
+        // stopped doing marking work. So, it's now safe to
+        // re-initialise our data structures. At the end of this method,
+        // task 0 will clear the global data structures.
+      }
 
       statsOnly( ++_aborted_overflow );
 
       // We clear the local state of this task...
       clear_region_fields();
 
-      // ...and enter the second barrier.
-      _cm->enter_second_sync_barrier(_worker_id);
+      if (!is_serial) {
+        // ...and enter the second barrier.
+        _cm->enter_second_sync_barrier(_worker_id);
+      }
       // At this point everything has bee re-initialised and we're
       // ready to restart.
     }
diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
index 61e64df8b9c..b8dd467b30b 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@@ -166,7 +166,7 @@ class CMBitMap : public CMBitMapRO {
 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   VirtualSpace _virtual_space;   // Underlying backing store for actual stack
   ConcurrentMark* _cm;
-  oop*   _base;        // bottom of stack
+  oop* _base;        // bottom of stack
   jint _index;       // one more than last occupied index
   jint _capacity;    // max #elements
   jint _saved_index; // value of _index saved at start of GC
@@ -1146,7 +1146,9 @@ public:
   // trying not to exceed the given duration. However, it might exit
   // prematurely, according to some conditions (i.e. SATB buffers are
   // available for processing).
-  void do_marking_step(double target_ms, bool do_stealing, bool do_termination);
+  void do_marking_step(double target_ms,
+                       bool do_termination,
+                       bool is_serial);
 
   // These two calls start and stop the timer
   void record_start_time() {

From 9164834d731f0ac504382a29a8f0e1c8d8c6236c Mon Sep 17 00:00:00 2001
From: John Cuthbertson <johnc@openjdk.org>
Date: Tue, 19 Mar 2013 00:57:39 -0700
Subject: [PATCH 4/6] 8009940: G1: assert(_finger == _heap_end) failed,
 concurrentMark.cpp:809

Skip reference processing if the global marking stack overflows during remark. Refactor and rename set_phase(); move code that sets the concurrency level into its own routine. Do not call set_phase() from within parallel reference processing; use the concurrency level routine instead. The marking state should only set reset by CMTask[0] during the concurrent phase of the marking cycle; if an overflow occurs at any stage during the remark, the marking state will be reset after reference processing.

Reviewed-by: brutisso, jmasa
---
 .../gc_implementation/g1/concurrentMark.cpp   | 88 +++++++++++++------
 .../gc_implementation/g1/concurrentMark.hpp   |  5 +-
 2 files changed, 67 insertions(+), 26 deletions(-)

diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
index 63cb028da1f..d98b06ac705 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@@ -784,7 +784,7 @@ void ConcurrentMark::reset_marking_state(bool clear_overflow) {
   }
 }
 
-void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
+void ConcurrentMark::set_concurrency(uint active_tasks) {
   assert(active_tasks <= _max_worker_id, "we should not have more");
 
   _active_tasks = active_tasks;
@@ -793,6 +793,10 @@ void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
+}
+
+void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
+  set_concurrency(active_tasks);
 
   _concurrent = concurrent;
   // We propagate this to all tasks, not just the active ones.
@@ -806,7 +810,9 @@ void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
     // false before we start remark. At this point we should also be
     // in a STW phase.
     assert(!concurrent_marking_in_progress(), "invariant");
-    assert(_finger == _heap_end, "only way to get here");
+    assert(_finger == _heap_end,
+           err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
+                   _finger, _heap_end));
     update_g1_committed(true);
   }
 }
@@ -974,20 +980,28 @@ void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
     gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
   }
 
-  // let the task associated with with worker 0 do this
-  if (worker_id == 0) {
-    // task 0 is responsible for clearing the global data structures
-    // We should be here because of an overflow. During STW we should
-    // not clear the overflow flag since we rely on it being true when
-    // we exit this method to abort the pause and restart concurent
-    // marking.
-    reset_marking_state(concurrent() /* clear_overflow */);
-    force_overflow()->update();
+  // If we're executing the concurrent phase of marking, reset the marking
+  // state; otherwise the marking state is reset after reference processing,
+  // during the remark pause.
+  // If we reset here as a result of an overflow during the remark we will
+  // see assertion failures from any subsequent set_concurrency_and_phase()
+  // calls.
+  if (concurrent()) {
+    // let the task associated with with worker 0 do this
+    if (worker_id == 0) {
+      // task 0 is responsible for clearing the global data structures
+      // We should be here because of an overflow. During STW we should
+      // not clear the overflow flag since we rely on it being true when
+      // we exit this method to abort the pause and restart concurent
+      // marking.
+      reset_marking_state(true /* clear_overflow */);
+      force_overflow()->update();
 
-    if (G1Log::fine()) {
-      gclog_or_tty->date_stamp(PrintGCDateStamps);
-      gclog_or_tty->stamp(PrintGCTimeStamps);
-      gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
+      if (G1Log::fine()) {
+        gclog_or_tty->date_stamp(PrintGCDateStamps);
+        gclog_or_tty->stamp(PrintGCTimeStamps);
+        gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
+      }
     }
   }
 
@@ -1007,7 +1021,7 @@ void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
   if (concurrent()) {
     ConcurrentGCThread::stsJoin();
   }
-  // at this point everything should be re-initialised and ready to go
+  // at this point everything should be re-initialized and ready to go
 
   if (verbose_low()) {
     gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
@@ -1222,8 +1236,8 @@ void ConcurrentMark::markFromRoots() {
 
   uint active_workers = MAX2(1U, parallel_marking_threads());
 
-  // Parallel task terminator is set in "set_phase()"
-  set_phase(active_workers, true /* concurrent */);
+  // Parallel task terminator is set in "set_concurrency_and_phase()"
+  set_concurrency_and_phase(active_workers, true /* concurrent */);
 
   CMConcurrentMarkingTask markingTask(this, cmThread());
   if (use_parallel_marking_threads()) {
@@ -2349,9 +2363,11 @@ void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
 
   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
 
-  // We need to reset the phase for each task execution so that
-  // the termination protocol of CMTask::do_marking_step works.
-  _cm->set_phase(_active_workers, false /* concurrent */);
+  // We need to reset the concurrency level before each
+  // proxy task execution, so that the termination protocol
+  // and overflow handling in CMTask::do_marking_step() knows
+  // how many workers to wait for.
+  _cm->set_concurrency(_active_workers);
   _g1h->set_par_threads(_active_workers);
   _workers->run_task(&proc_task_proxy);
   _g1h->set_par_threads(0);
@@ -2377,12 +2393,29 @@ void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
 
   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
 
+  // Not strictly necessary but...
+  //
+  // We need to reset the concurrency level before each
+  // proxy task execution, so that the termination protocol
+  // and overflow handling in CMTask::do_marking_step() knows
+  // how many workers to wait for.
+  _cm->set_concurrency(_active_workers);
   _g1h->set_par_threads(_active_workers);
   _workers->run_task(&enq_task_proxy);
   _g1h->set_par_threads(0);
 }
 
 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
+  if (has_overflown()) {
+    // Skip processing the discovered references if we have
+    // overflown the global marking stack. Reference objects
+    // only get discovered once so it is OK to not
+    // de-populate the discovered reference lists. We could have,
+    // but the only benefit would be that, when marking restarts,
+    // less reference objects are discovered.
+    return;
+  }
+
   ResourceMark rm;
   HandleMark   hm;
 
@@ -2438,6 +2471,10 @@ void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
                                               g1h->workers(), active_workers);
     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
 
+    // Set the concurrency level. The phase was already set prior to
+    // executing the remark task.
+    set_concurrency(active_workers);
+
     // Set the degree of MT processing here.  If the discovery was done MT,
     // the number of threads involved during discovery could differ from
     // the number of active workers.  This is OK as long as the discovered
@@ -2527,7 +2564,7 @@ void ConcurrentMark::checkpointRootsFinalWork() {
       active_workers = (uint) ParallelGCThreads;
       g1h->workers()->set_active_workers(active_workers);
     }
-    set_phase(active_workers, false /* concurrent */);
+    set_concurrency_and_phase(active_workers, false /* concurrent */);
     // Leave _parallel_marking_threads at it's
     // value originally calculated in the ConcurrentMark
     // constructor and pass values of the active workers
@@ -2543,7 +2580,7 @@ void ConcurrentMark::checkpointRootsFinalWork() {
   } else {
     G1CollectedHeap::StrongRootsScope srs(g1h);
     uint active_workers = 1;
-    set_phase(active_workers, false /* concurrent */);
+    set_concurrency_and_phase(active_workers, false /* concurrent */);
 
     // Note - if there's no work gang then the VMThread will be
     // the thread to execute the remark - serially. We have
@@ -3923,7 +3960,7 @@ void CMTask::print_stats() {
       (2) When a global overflow (on the global stack) has been
       triggered. Before the task aborts, it will actually sync up with
       the other tasks to ensure that all the marking data structures
-      (local queues, stacks, fingers etc.)  are re-initialised so that
+      (local queues, stacks, fingers etc.)  are re-initialized so that
       when do_marking_step() completes, the marking phase can
       immediately restart.
 
@@ -4371,7 +4408,8 @@ void CMTask::do_marking_step(double time_target_ms,
         // ...and enter the second barrier.
         _cm->enter_second_sync_barrier(_worker_id);
       }
-      // At this point everything has bee re-initialised and we're
+      // At this point, if we're during the concurrent phase of
+      // marking, everything has been re-initialized and we're
       // ready to restart.
     }
 
diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
index b8dd467b30b..43554a5c991 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@@ -491,9 +491,12 @@ protected:
   // structures are initialised to a sensible and predictable state.
   void set_non_marking_state();
 
+  // Called to indicate how many threads are currently active.
+  void set_concurrency(uint active_tasks);
+
   // It should be called to indicate which phase we're in (concurrent
   // mark or remark) and how many threads are currently active.
-  void set_phase(uint active_tasks, bool concurrent);
+  void set_concurrency_and_phase(uint active_tasks, bool concurrent);
 
   // prints all gathered CM-related statistics
   void print_stats();

From a8087d1cb647e7d7ff4410e091378de7b7ce76d6 Mon Sep 17 00:00:00 2001
From: John Cuthbertson <johnc@openjdk.org>
Date: Tue, 19 Mar 2013 09:38:37 -0700
Subject: [PATCH 5/6] 8008301: G1:
 guarantee(satb_mq_set.completed_buffers_num() == 0) failure

If the marking stack overflows while the marking tasks are draining the SATB buffers, remark will exit with some SATB buffers left unprocessed. Relax the guarantee to allow for overflow.

Reviewed-by: jmasa, brutisso
---
 .../gc_implementation/g1/concurrentMark.cpp   | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
index d98b06ac705..d9d70620e47 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@@ -1289,12 +1289,22 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
   if (has_overflown()) {
     // Oops.  We overflowed.  Restart concurrent marking.
     _restart_for_overflow = true;
-    // Clear the marking state because we will be restarting
-    // marking due to overflowing the global mark stack.
-    reset_marking_state();
     if (G1TraceMarkStackOverflow) {
       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
     }
+
+    // Verify the heap w.r.t. the previous marking bitmap.
+    if (VerifyDuringGC) {
+      HandleMark hm;  // handle scope
+      gclog_or_tty->print(" VerifyDuringGC:(overflow)");
+      Universe::heap()->prepare_for_verify();
+      Universe::verify(/* silent */ false,
+                       /* option */ VerifyOption_G1UsePrevMarking);
+    }
+
+    // Clear the marking state because we will be restarting
+    // marking due to overflowing the global mark stack.
+    reset_marking_state();
   } else {
     // Aggregate the per-task counting data that we have accumulated
     // while marking.
@@ -2593,7 +2603,11 @@ void ConcurrentMark::checkpointRootsFinalWork() {
     remarkTask.work(0);
   }
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-  guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
+  guarantee(has_overflown() ||
+            satb_mq_set.completed_buffers_num() == 0,
+            err_msg("Invariant: has_overflown = %s, num buffers = %d",
+                    BOOL_TO_STR(has_overflown()),
+                    satb_mq_set.completed_buffers_num()));
 
   print_stats();
 }

From 6bec5bf02c19c41794a739e399d21b76dd67613f Mon Sep 17 00:00:00 2001
From: Mikael Gerdin <mgerdin@openjdk.org>
Date: Thu, 21 Mar 2013 09:07:43 +0100
Subject: [PATCH 6/6] 8004241: NPG: Metaspace occupies more memory than
 specified by -XX:MaxMetaspaceSize option

Enforce MaxMetaspaceSize for both metaspace parts, check MaxMetaspaceSize against "reserved", not "capacity"

Reviewed-by: jmasa, johnc
---
 hotspot/src/share/vm/memory/metaspace.cpp | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/hotspot/src/share/vm/memory/metaspace.cpp b/hotspot/src/share/vm/memory/metaspace.cpp
index f7861121383..fc4ed3d376e 100644
--- a/hotspot/src/share/vm/memory/metaspace.cpp
+++ b/hotspot/src/share/vm/memory/metaspace.cpp
@@ -1100,25 +1100,24 @@ size_t MetaspaceGC::delta_capacity_until_GC(size_t word_size) {
 }
 
 bool MetaspaceGC::should_expand(VirtualSpaceList* vsl, size_t word_size) {
+  // If the user wants a limit, impose one.
+  if (!FLAG_IS_DEFAULT(MaxMetaspaceSize) &&
+      MetaspaceAux::reserved_in_bytes() >= MaxMetaspaceSize) {
+    return false;
+  }
 
   // Class virtual space should always be expanded.  Call GC for the other
   // metadata virtual space.
   if (vsl == Metaspace::class_space_list()) return true;
 
-  // If the user wants a limit, impose one.
-  size_t max_metaspace_size_words = MaxMetaspaceSize / BytesPerWord;
-  size_t metaspace_size_words = MetaspaceSize / BytesPerWord;
-  if (!FLAG_IS_DEFAULT(MaxMetaspaceSize) &&
-      vsl->capacity_words_sum() >= max_metaspace_size_words) {
-    return false;
-  }
-
   // If this is part of an allocation after a GC, expand
   // unconditionally.
   if(MetaspaceGC::expand_after_GC()) {
     return true;
   }
 
+  size_t metaspace_size_words = MetaspaceSize / BytesPerWord;
+
   // If the capacity is below the minimum capacity, allow the
   // expansion.  Also set the high-water-mark (capacity_until_GC)
   // to that minimum capacity so that a GC will not be induced