8239024: Kitchensink24HStress.java failed due to timeout

Reviewed-by: egahlin
This commit is contained in:
Markus Grönlund 2020-06-11 10:56:59 +02:00
parent 0228a5c767
commit 83d0318ea1
5 changed files with 115 additions and 87 deletions

View File

@ -58,10 +58,6 @@ void EventEmitter::emit(ObjectSampler* sampler, int64_t cutoff_ticks, bool emit_
EdgeStore edge_store;
if (cutoff_ticks <= 0) {
// no reference chains
MutexLocker lock(JfrStream_lock, Mutex::_no_safepoint_check_flag);
// The lock is needed here to prevent the recorder thread (running flush())
// from writing old object events out from the thread local buffer
// before the required constant pools have been serialized.
JfrTicks time_stamp = JfrTicks::now();
EventEmitter emitter(time_stamp, time_stamp);
emitter.write_events(sampler, &edge_store, emit_all);
@ -73,7 +69,6 @@ void EventEmitter::emit(ObjectSampler* sampler, int64_t cutoff_ticks, bool emit_
}
size_t EventEmitter::write_events(ObjectSampler* object_sampler, EdgeStore* edge_store, bool emit_all) {
assert_locked_or_safepoint(JfrStream_lock);
assert(_thread == Thread::current(), "invariant");
assert(_thread->jfr_thread_local() == _jfr_thread_local, "invariant");
assert(object_sampler != NULL, "invariant");
@ -82,19 +77,32 @@ size_t EventEmitter::write_events(ObjectSampler* object_sampler, EdgeStore* edge
const jlong last_sweep = emit_all ? max_jlong : object_sampler->last_sweep().value();
size_t count = 0;
// First pass associates a live sample with its immediate edge
// in preparation for writing checkpoint information.
const ObjectSample* current = object_sampler->first();
while (current != NULL) {
ObjectSample* prev = current->prev();
if (current->is_alive_and_older_than(last_sweep)) {
write_event(current, edge_store);
link_sample_with_edge(current, edge_store);
++count;
}
current = prev;
}
if (count > 0) {
// serialize associated checkpoints and potential chains
// We need to serialize the associated checkpoints and potential chains
// before writing the events to ensure constants are available for resolution
// at the time old object sample events appear in the stream.
ObjectSampleCheckpoint::write(object_sampler, edge_store, emit_all, _thread);
// Now we are ready to write the events
const ObjectSample* current = object_sampler->first();
while (current != NULL) {
ObjectSample* prev = current->prev();
if (current->is_alive_and_older_than(last_sweep)) {
write_event(current, edge_store);
}
current = prev;
}
}
return count;
}
@ -107,6 +115,22 @@ static int array_size(const oop object) {
return min_jint;
}
void EventEmitter::link_sample_with_edge(const ObjectSample* sample, EdgeStore* edge_store) {
assert(sample != NULL, "invariant");
assert(!sample->is_dead(), "invariant");
assert(edge_store != NULL, "invariant");
if (SafepointSynchronize::is_at_safepoint()) {
if (!sample->object()->mark().is_marked()) {
// Associated with an edge (chain) already during heap traversal.
return;
}
}
// In order to dump out a representation of the event
// even though the sample object was found not reachable / too long to reach,
// we need to register a top level edge.
edge_store->put(UnifiedOopRef::encode_in_native(sample->object_addr()));
}
void EventEmitter::write_event(const ObjectSample* sample, EdgeStore* edge_store) {
assert(sample != NULL, "invariant");
assert(!sample->is_dead(), "invariant");
@ -121,14 +145,10 @@ void EventEmitter::write_event(const ObjectSample* sample, EdgeStore* edge_store
}
}
if (edge == NULL) {
// In order to dump out a representation of the event
// even though it was not reachable / too long to reach,
// we need to register a top level edge for this object.
edge = edge_store->put(UnifiedOopRef::encode_in_native(sample->object_addr()));
edge = edge_store->get(UnifiedOopRef::encode_in_native(sample->object_addr()));
} else {
gc_root_id = edge_store->gc_root_id(edge);
}
assert(edge != NULL, "invariant");
const traceid object_id = edge_store->get_id(edge);
assert(object_id != 0, "invariant");

View File

@ -51,6 +51,7 @@ class EventEmitter : public CHeapObj<mtTracing> {
void write_event(const ObjectSample* sample, EdgeStore* edge_store);
size_t write_events(ObjectSampler* sampler, EdgeStore* store, bool emit_all);
void link_sample_with_edge(const ObjectSample* sample, EdgeStore* edge_store);
static void emit(ObjectSampler* sampler, int64_t cutoff_ticks, bool emit_all, bool skip_bfs);
};

View File

@ -259,7 +259,6 @@ static void install_stack_traces(const ObjectSampler* sampler, JfrStackTraceRepo
}
void ObjectSampleCheckpoint::on_rotation(const ObjectSampler* sampler, JfrStackTraceRepository& stack_trace_repo) {
assert(JfrStream_lock->owned_by_self(), "invariant");
assert(sampler != NULL, "invariant");
assert(LeakProfiler::is_running(), "invariant");
Thread* const thread = Thread::current();
@ -372,7 +371,6 @@ static void write_sample_blobs(const ObjectSampler* sampler, bool emit_all, Thre
}
void ObjectSampleCheckpoint::write(const ObjectSampler* sampler, EdgeStore* edge_store, bool emit_all, Thread* thread) {
assert_locked_or_safepoint(JfrStream_lock);
assert(sampler != NULL, "invariant");
assert(edge_store != NULL, "invariant");
assert(thread != NULL, "invariant");

View File

@ -33,7 +33,6 @@
#include "logging/log.hpp"
#include "runtime/atomic.hpp"
#include "runtime/globals.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/os.hpp"
#include "runtime/thread.inline.hpp"
@ -428,10 +427,6 @@ static bool prepare_for_emergency_dump(Thread* thread) {
// need WatcherThread as a safeguard against potential deadlocks
return false;
}
if (JfrStream_lock->owned_by_self()) {
// crashed during jfr rotation, disallow recursion
return false;
}
#ifdef ASSERT
Mutex* owned_lock = thread->owned_locks();

View File

@ -57,6 +57,70 @@
// incremented on each flushpoint
static u8 flushpoint_id = 0;
class JfrRotationLock : public StackObj {
private:
static const Thread* _owner_thread;
static const int retry_wait_millis;
static volatile int _lock;
Thread* _thread;
bool _recursive;
static bool acquire(Thread* thread) {
if (Atomic::cmpxchg(&_lock, 0, 1) == 0) {
assert(_owner_thread == NULL, "invariant");
_owner_thread = thread;
return true;
}
return false;
}
// The system can proceed to a safepoint
// because even if the thread is a JavaThread,
// it is running as _thread_in_native here.
void lock() {
while (!acquire(_thread)) {
os::naked_short_sleep(retry_wait_millis);
}
assert(is_owner(), "invariant");
}
public:
JfrRotationLock() : _thread(Thread::current()), _recursive(false) {
assert(_thread != NULL, "invariant");
if (_thread == _owner_thread) {
// Recursive case is not supported.
_recursive = true;
assert(_lock == 1, "invariant");
// For user, should not be "jfr, system".
log_info(jfr)("Unable to issue rotation due to recursive calls.");
return;
}
lock();
}
~JfrRotationLock() {
assert(is_owner(), "invariant");
if (_recursive) {
return;
}
_owner_thread = NULL;
OrderAccess::storestore();
_lock = 0;
}
static bool is_owner() {
return _owner_thread == Thread::current();
}
bool is_acquired_recursively() const {
return _recursive;
}
};
const Thread* JfrRotationLock::_owner_thread = NULL;
const int JfrRotationLock::retry_wait_millis = 10;
volatile int JfrRotationLock::_lock = 0;
template <typename Instance, size_t(Instance::*func)()>
class Content {
private:
@ -341,11 +405,13 @@ static void set_recorder_state(RecorderState from, RecorderState to) {
}
static void start_recorder() {
assert(JfrRotationLock::is_owner(), "invariant");
set_recorder_state(STOPPED, RUNNING);
log_debug(jfr, system)("Recording service STARTED");
}
static void stop_recorder() {
assert(JfrRotationLock::is_owner(), "invariant");
set_recorder_state(RUNNING, STOPPED);
log_debug(jfr, system)("Recording service STOPPED");
}
@ -357,7 +423,7 @@ bool JfrRecorderService::is_recording() {
}
void JfrRecorderService::start() {
MutexLocker lock(JfrStream_lock, Mutex::_no_safepoint_check_flag);
JfrRotationLock lock;
assert(!is_recording(), "invariant");
clear();
open_new_chunk();
@ -372,6 +438,7 @@ static void stop() {
}
void JfrRecorderService::clear() {
assert(JfrRotationLock::is_owner(), "invariant");
pre_safepoint_clear();
invoke_safepoint_clear();
post_safepoint_clear();
@ -404,7 +471,7 @@ void JfrRecorderService::post_safepoint_clear() {
}
void JfrRecorderService::open_new_chunk(bool vm_error) {
assert(JfrStream_lock->owned_by_self(), "invariant");
assert(JfrRotationLock::is_owner(), "invariant");
JfrChunkRotation::on_rotation();
const bool valid_chunk = _repository.open_chunk(vm_error);
_storage.control().set_to_disk(valid_chunk);
@ -413,54 +480,14 @@ void JfrRecorderService::open_new_chunk(bool vm_error) {
}
}
// 'rotation_safepoint_pending' is currently only relevant in the unusual case of an emergency dump.
// Since the JfrStream_lock must be acquired using _no_safepoint_check,
// if the thread running the emergency dump is a JavaThread, a pending safepoint, induced by rotation,
// would lead to a deadlock. This deadlock, although unpleasant, is not completely horrendous at this
// location because the WatcherThread will terminate the VM after a timeout.
// Deadlock avoidance is done not to affect the stability of general VM error reporting.
static bool rotation_safepoint_pending = false;
static bool is_rotation_safepoint_pending() {
return Atomic::load_acquire(&rotation_safepoint_pending);
}
static void set_rotation_safepoint_pending(bool value) {
assert(value ? !is_rotation_safepoint_pending() : is_rotation_safepoint_pending(), "invariant");
Atomic::release_store(&rotation_safepoint_pending, value);
}
static bool vm_error = false;
static const Thread* vm_error_thread = NULL;
static bool prepare_for_vm_error_rotation() {
assert(!JfrStream_lock->owned_by_self(), "invariant");
Thread* const t = Thread::current();
assert(t != NULL, "invariant");
if (is_rotation_safepoint_pending() && t->is_Java_thread()) {
// A safepoint is pending, avoid deadlock.
log_warning(jfr, system)("Unable to issue successful emergency dump");
return false;
}
vm_error_thread = t;
vm_error = true;
OrderAccess::fence();
return true;
}
void JfrRecorderService::vm_error_rotation() {
assert(JfrStream_lock->owned_by_self(), "invariant");
assert(vm_error, "invariant");
Thread* const t = Thread::current();
if (vm_error_thread != t) {
return;
}
assert(vm_error_thread == t, "invariant");
assert(JfrRotationLock::is_owner(), "invariant");
if (!_chunkwriter.is_valid()) {
open_new_chunk(true);
}
if (_chunkwriter.is_valid()) {
_storage.flush_regular_buffer(t->jfr_thread_local()->native_buffer(), t);
Thread* const thread = Thread::current();
_storage.flush_regular_buffer(thread->jfr_thread_local()->native_buffer(), thread);
_chunkwriter.mark_chunk_final();
invoke_flush();
_chunkwriter.set_time_stamp();
@ -471,16 +498,11 @@ void JfrRecorderService::vm_error_rotation() {
}
void JfrRecorderService::rotate(int msgs) {
assert(!JfrStream_lock->owned_by_self(), "invariant");
DEBUG_ONLY(JfrJavaSupport::check_java_thread_in_native(Thread::current()));
if (msgs & MSGBIT(MSG_VM_ERROR)) {
// emergency dump
if (!prepare_for_vm_error_rotation()) {
return;
}
JfrRotationLock lock;
if (lock.is_acquired_recursively()) {
return;
}
MutexLocker lock(JfrStream_lock, Mutex::_no_safepoint_check_flag);
if (vm_error) {
if (msgs & MSGBIT(MSG_VM_ERROR)) {
vm_error_rotation();
return;
}
@ -495,7 +517,7 @@ void JfrRecorderService::rotate(int msgs) {
}
void JfrRecorderService::in_memory_rotation() {
assert(JfrStream_lock->owned_by_self(), "invariant");
assert(JfrRotationLock::is_owner(), "invariant");
// currently running an in-memory recording
assert(!_storage.control().to_disk(), "invariant");
open_new_chunk();
@ -506,17 +528,14 @@ void JfrRecorderService::in_memory_rotation() {
}
void JfrRecorderService::chunk_rotation() {
assert(JfrStream_lock->owned_by_self(), "invariant");
assert(JfrRotationLock::is_owner(), "invariant");
finalize_current_chunk();
open_new_chunk();
}
void JfrRecorderService::finalize_current_chunk() {
assert(_chunkwriter.is_valid(), "invariant");
assert(!is_rotation_safepoint_pending(), "invariant");
set_rotation_safepoint_pending(true);
write();
assert(!is_rotation_safepoint_pending(), "invariant");
}
void JfrRecorderService::write() {
@ -527,7 +546,6 @@ void JfrRecorderService::write() {
void JfrRecorderService::pre_safepoint_write() {
assert(_chunkwriter.is_valid(), "invariant");
assert(is_rotation_safepoint_pending(), "invariant");
if (LeakProfiler::is_running()) {
// Exclusive access to the object sampler instance.
// The sampler is released (unlocked) later in post_safepoint_write.
@ -551,8 +569,6 @@ void JfrRecorderService::invoke_safepoint_write() {
void JfrRecorderService::safepoint_write() {
assert(SafepointSynchronize::is_at_safepoint(), "invariant");
assert(is_rotation_safepoint_pending(), "invariant");
set_rotation_safepoint_pending(false);
_checkpoint_manager.begin_epoch_shift();
if (_string_pool.is_modified()) {
write_stringpool_safepoint(_string_pool, _chunkwriter);
@ -566,7 +582,6 @@ void JfrRecorderService::safepoint_write() {
void JfrRecorderService::post_safepoint_write() {
assert(_chunkwriter.is_valid(), "invariant");
assert(!is_rotation_safepoint_pending(), "invariant");
// During the safepoint tasks just completed, the system transitioned to a new epoch.
// Type tagging is epoch relative which entails we are able to write out the
// already tagged artifacts for the previous epoch. We can accomplish this concurrently
@ -603,12 +618,10 @@ static void write_thread_local_buffer(JfrChunkWriter& chunkwriter, Thread* t) {
assert(buffer != NULL, "invariant");
if (!buffer->empty()) {
chunkwriter.write_unbuffered(buffer->top(), buffer->pos() - buffer->top());
reset_buffer(buffer, t);
}
}
size_t JfrRecorderService::flush() {
assert(JfrStream_lock->owned_by_self(), "invariant");
size_t total_elements = flush_metadata(_chunkwriter);
const size_t storage_elements = flush_storage(_storage, _chunkwriter);
if (0 == storage_elements) {
@ -628,7 +641,7 @@ typedef Content<JfrRecorderService, &JfrRecorderService::flush> FlushFunctor;
typedef WriteContent<FlushFunctor> Flush;
void JfrRecorderService::invoke_flush() {
assert(JfrStream_lock->owned_by_self(), "invariant");
assert(JfrRotationLock::is_owner(), "invariant");
assert(_chunkwriter.is_valid(), "invariant");
Thread* const t = Thread::current();
++flushpoint_id;
@ -642,7 +655,7 @@ void JfrRecorderService::invoke_flush() {
void JfrRecorderService::flushpoint() {
DEBUG_ONLY(JfrJavaSupport::check_java_thread_in_native(Thread::current()));
MutexLocker lock(JfrStream_lock, Mutex::_no_safepoint_check_flag);
JfrRotationLock lock;
if (_chunkwriter.is_valid()) {
invoke_flush();
}
@ -650,6 +663,7 @@ void JfrRecorderService::flushpoint() {
void JfrRecorderService::process_full_buffers() {
DEBUG_ONLY(JfrJavaSupport::check_java_thread_in_native(Thread::current()));
JfrRotationLock lock;
if (_chunkwriter.is_valid()) {
_storage.write_full();
}