8181143: Introduce diagnostic flag to abort VM on too long VM operations

Reviewed-by: rkennke, zgu, dholmes, stuefe, rehn
This commit is contained in:
Aleksey Shipilev 2018-12-13 16:45:24 +01:00
parent 650f3fc113
commit 2278601b7c
4 changed files with 163 additions and 0 deletions

View File

@ -501,6 +501,13 @@ define_pd_global(uint64_t,MaxRAM, 1ULL*G);
diagnostic(bool, AbortVMOnSafepointTimeout, false, \
"Abort upon failure to reach safepoint (see SafepointTimeout)") \
\
diagnostic(bool, AbortVMOnVMOperationTimeout, false, \
"Abort upon failure to complete VM operation promptly") \
\
diagnostic(intx, AbortVMOnVMOperationTimeoutDelay, 1000, \
"Delay in milliseconds for option AbortVMOnVMOperationTimeout") \
range(0, max_intx) \
\
/* 50 retries * (5 * current_retry_count) millis = ~6.375 seconds */ \
/* typically, at most a few retries are needed */ \
product(intx, SuspendRetryCount, 50, \

View File

@ -28,6 +28,7 @@
#include "jfr/jfrEvents.hpp"
#include "jfr/support/jfrThreadId.hpp"
#include "logging/log.hpp"
#include "logging/logStream.hpp"
#include "logging/logConfiguration.hpp"
#include "memory/resourceArea.hpp"
#include "oops/method.hpp"
@ -197,6 +198,32 @@ void VMOperationQueue::oops_do(OopClosure* f) {
drain_list_oops_do(f);
}
//------------------------------------------------------------------------------------------------------------------
// Timeout machinery
void VMOperationTimeoutTask::task() {
assert(AbortVMOnVMOperationTimeout, "only if enabled");
if (is_armed()) {
jlong delay = (os::javaTimeMillis() - _arm_time);
if (delay > AbortVMOnVMOperationTimeoutDelay) {
fatal("VM operation took too long: " SIZE_FORMAT " ms (timeout: " SIZE_FORMAT " ms)",
delay, AbortVMOnVMOperationTimeoutDelay);
}
}
}
bool VMOperationTimeoutTask::is_armed() {
return OrderAccess::load_acquire(&_armed) != 0;
}
void VMOperationTimeoutTask::arm() {
_arm_time = os::javaTimeMillis();
OrderAccess::release_store_fence(&_armed, 1);
}
void VMOperationTimeoutTask::disarm() {
OrderAccess::release_store_fence(&_armed, 0);
}
//------------------------------------------------------------------------------------------------------------------
// Implementation of VMThread stuff
@ -209,12 +236,28 @@ VM_Operation* VMThread::_cur_vm_operation = NULL;
VMOperationQueue* VMThread::_vm_queue = NULL;
PerfCounter* VMThread::_perf_accumulated_vm_operation_time = NULL;
const char* VMThread::_no_op_reason = NULL;
VMOperationTimeoutTask* VMThread::_timeout_task = NULL;
void VMThread::create() {
assert(vm_thread() == NULL, "we can only allocate one VMThread");
_vm_thread = new VMThread();
if (AbortVMOnVMOperationTimeout) {
// Make sure we call the timeout task frequently enough, but not too frequent.
// Try to make the interval 10% of the timeout delay, so that we miss the timeout
// by those 10% at max. Periodic task also expects it to fit min/max intervals.
size_t interval = (size_t)AbortVMOnVMOperationTimeoutDelay / 10;
interval = interval / PeriodicTask::interval_gran * PeriodicTask::interval_gran;
interval = MAX2<size_t>(interval, PeriodicTask::min_interval);
interval = MIN2<size_t>(interval, PeriodicTask::max_interval);
_timeout_task = new VMOperationTimeoutTask(interval);
_timeout_task->enroll();
} else {
assert(_timeout_task == NULL, "sanity");
}
// Create VM operation queue
_vm_queue = new VMOperationQueue();
guarantee(_vm_queue != NULL, "just checking");
@ -492,6 +535,11 @@ void VMThread::loop() {
_vm_queue->set_drain_list(safepoint_ops); // ensure ops can be scanned
SafepointSynchronize::begin();
if (_timeout_task != NULL) {
_timeout_task->arm();
}
evaluate_operation(_cur_vm_operation);
// now process all queued safepoint ops, iteratively draining
// the queue until there are none left
@ -533,6 +581,10 @@ void VMThread::loop() {
_vm_queue->set_drain_list(NULL);
if (_timeout_task != NULL) {
_timeout_task->disarm();
}
// Complete safepoint synchronization
SafepointSynchronize::end();

View File

@ -27,6 +27,7 @@
#include "runtime/perfData.hpp"
#include "runtime/thread.hpp"
#include "runtime/task.hpp"
#include "runtime/vmOperations.hpp"
//
@ -84,6 +85,26 @@ class VMOperationQueue : public CHeapObj<mtInternal> {
};
// VM operation timeout handling: warn or abort the VM when VM operation takes
// too long. Periodic tasks do not participate in safepoint protocol, and therefore
// can fire when application threads are stopped.
class VMOperationTimeoutTask : public PeriodicTask {
private:
volatile int _armed;
jlong _arm_time;
public:
VMOperationTimeoutTask(size_t interval_time) :
PeriodicTask(interval_time), _armed(0), _arm_time(0) {}
virtual void task();
bool is_armed();
void arm();
void disarm();
};
//
// A single VMThread (the primordial thread) spawns all other threads
// and is itself used by other threads to offload heavy vm operations
@ -101,6 +122,8 @@ class VMThread: public NamedThread {
static const char* _no_op_reason;
static VMOperationTimeoutTask* _timeout_task;
static bool no_op_safepoint_needed(bool check_time);
void evaluate_operation(VM_Operation* op);

View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2018, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
import jdk.test.lib.*;
import jdk.test.lib.process.*;
/*
* @test TestAbortOnVMOperationTimeout
* @bug 8181143
* @summary Check abort on VM timeout is working
* @library /test/lib
* @modules java.base/jdk.internal.misc
* java.management
*/
public class TestAbortOnVMOperationTimeout {
public static void main(String[] args) throws Exception {
if (args.length > 0) {
Object[] arr = new Object[10_000_000];
for (int i = 0; i < arr.length; i++) {
arr[i] = new Object();
}
return;
}
// These should definitely pass: more than a minute is enough for Serial to act.
// The values are deliberately non-round to trip off periodic task granularity.
for (int delay : new int[]{63423, 12388131}) {
testWith(delay, true);
}
// These should fail: Serial is not very fast. Traversing 10M objects in 5 ms
// means less than 0.5 ns per object, which is not doable.
for (int delay : new int[]{0, 1, 5}) {
testWith(delay, false);
}
}
public static void testWith(int delay, boolean shouldPass) throws Exception {
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
"-XX:+UnlockDiagnosticVMOptions",
"-XX:+AbortVMOnVMOperationTimeout",
"-XX:AbortVMOnVMOperationTimeoutDelay=" + delay,
"-Xmx256m",
"-XX:+UseSerialGC",
"-XX:-CreateCoredumpOnCrash",
"TestAbortOnVMOperationTimeout",
"foo"
);
OutputAnalyzer output = new OutputAnalyzer(pb.start());
if (shouldPass) {
output.shouldHaveExitValue(0);
} else {
output.shouldMatch("VM operation took too long");
output.shouldNotHaveExitValue(0);
}
}
}