diff --git a/src/hotspot/share/runtime/lockStack.cpp b/src/hotspot/share/runtime/lockStack.cpp index a88a84eb9f8..58b9c58a329 100644 --- a/src/hotspot/share/runtime/lockStack.cpp +++ b/src/hotspot/share/runtime/lockStack.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2022, Red Hat, Inc. All rights reserved. * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. - * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -118,7 +118,4 @@ void LockStack::print_on(outputStream* st) { OMCache::OMCache(JavaThread* jt) : _entries() { STATIC_ASSERT(std::is_standard_layout::value); STATIC_ASSERT(std::is_standard_layout::value); - STATIC_ASSERT(offsetof(OMCache, _null_sentinel) == offsetof(OMCache, _entries) + - offsetof(OMCache::OMCacheEntry, _oop) + - OMCache::CAPACITY * in_bytes(oop_to_oop_difference())); } diff --git a/src/hotspot/share/runtime/lockStack.hpp b/src/hotspot/share/runtime/lockStack.hpp index 199f7cb91f2..a32364b1774 100644 --- a/src/hotspot/share/runtime/lockStack.hpp +++ b/src/hotspot/share/runtime/lockStack.hpp @@ -139,7 +139,6 @@ class OMCache { oop _oop = nullptr; ObjectMonitor* _monitor = nullptr; } _entries[CAPACITY]; - const oop _null_sentinel = nullptr; public: static ByteSize entries_offset() { return byte_offset_of(OMCache, _entries); } diff --git a/src/hotspot/share/runtime/objectMonitorTable.cpp b/src/hotspot/share/runtime/objectMonitorTable.cpp index bf046f90655..443e3a3a734 100644 --- a/src/hotspot/share/runtime/objectMonitorTable.cpp +++ b/src/hotspot/share/runtime/objectMonitorTable.cpp @@ -34,6 +34,88 @@ #include "utilities/concurrentHashTableTasks.inline.hpp" #include "utilities/globalDefinitions.hpp" +// ----------------------------------------------------------------------------- +// Theory of operations -- Object Monitor Table: +// +// The OMT (Object Monitor Table) is a concurrent hash table specifically +// designed so that it (in the normal case) can be searched from C2 generated +// code. +// +// In its simplest form it consists of: +// 1. An array of pointers. +// 2. The size (capacity) of the array, which is always a power of two. +// +// When you want to find a monitor associated with an object, you extract the +// hash value of the object. Then calculate an index by taking the hash value +// and bit-wise AND it with the capacity mask of the OMT. Now use that index +// into the OMT's array of pointers. If the pointer is non null, check if it's +// a monitor pointer that is associated with the object. If so you're done. If +// THE pointer is non null, but associated with another object, you start +// looking at (index+1), (index+2) and so on, until you either find the +// correct monitor, or a null pointer. Finding a null pointer means that the +// monitor is simply not in the OMT. +// +// If the size of the pointer array is significantly larger than the number of +// pointers in it, the chance of finding the monitor in the hash index +// (without any further linear searching) is quite high. It is also straight +// forward to generate C2 code for this, which for the fast path doesn't +// contain any branching at all. See: C2_MacroAssembler::fast_lock(). +// +// When the number of monitors (pointers in the array) reaches above the +// allowed limit (defined by the GROW_LOAD_FACTOR symbol) we need to grow the +// table. +// +// A simple description of how growing the OMT is done, is to say that we +// allocate a new table (twice as large as the old one), and then copy all the +// old monitor pointers from the old table to the new. +// +// But since the OMT is a concurrent hash table and things needs to work for +// other clients of the OMT while we grow it, it's gets a bit more +// complicated. +// +// Both the new and (potentially several) old table(s) may exist at the same +// time. The newest is allays called the "current", and the older ones are +// singly linked using a "prev" pointer. +// +// As soon as we have allocated and linked in the new "current" OMT, all new +// monitor pointers will be added to the new tale. Effectively making the +// atomic switch from "old current" to "new current" a linerization point. +// +// After that we start to go through all the indexes in the old table. If the +// index is empty (the pointer is null) we put a "tombstone" into that index, +// which will prevent any future concurrent insert ending up in that index. +// +// If the index contains a monitor pointer, we insert that monitor pointer +// into the OMT which can be considered as one generation newer. If the index +// contains a "removed" pointer, we just ignore it. +// +// We use special pointer values for "tombstone" and "removed". Any pointer +// that is not null, not a tombstone and not removed, is considered to be a +// pointer to a monitor. +// +// When all the monitor pointers from an old OMT has been transferred to the +// new OMT, the old table is unlinked. +// +// This copying from an old OMT to one generation newer OMT, will continue +// until all the monitor pointers from old OMTs has been transferred to the +// newest "current" OMT. +// +// The memory for old, unlinked OMTs will be freed after a thread-local +// handshake with all Java threads. +// +// Searching the OMT for a monitor pointer while there are several generations +// of the OMT, will start from the oldest OMT. +// +// A note about the GROW_LOAD_FACTOR: In order to guarantee that the add and +// search algorithms can't loop forever, we must make sure that there is at +// least one null pointer in the array to stop them from dead looping. +// Furthermore, when we grow the OMT, we must make sure that the new "current" +// can accommodate all the monitors from all older OMTs, while still being so +// sparsely populated that the C2 generated code will likely find what it's +// searching for at the hash index, without needing further linear searching. +// The grow load factor is set to 12.5%, which satisfies the above +// requirements. Don't change it for fun, it might backfire. + // ----------------------------------------------------------------------------- // ConcurrentHashTable storing links from objects to ObjectMonitors @@ -42,13 +124,13 @@ ObjectMonitorTable::Table* volatile ObjectMonitorTable::_curr; class ObjectMonitorTable::Table : public CHeapObj { friend class ObjectMonitorTable; + DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, 0); const size_t _capacity_mask; // One less than its power-of-two capacity Table* volatile _prev; // Set while rehashing ObjectMonitor* volatile* _buckets; // The payload - - char _padding[DEFAULT_CACHE_LINE_SIZE]; - + DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(_capacity_mask) + sizeof(_prev) + sizeof(_buckets)); volatile size_t _items_count; + DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(_items_count)); static ObjectMonitor* tombstone() { return (ObjectMonitor*)ObjectMonitorTable::SpecialPointerValues::tombstone; @@ -115,7 +197,7 @@ public: return should_grow(AtomicAccess::load(&_items_count)); } - ObjectMonitor* get(oop obj, int hash) { + ObjectMonitor* get(oop obj, intptr_t hash) { // Acquire tombstones and relocations in case prev transitioned to null Table* prev = AtomicAccess::load_acquire(&_prev); if (prev != nullptr) { @@ -130,7 +212,7 @@ public: for (;;) { ObjectMonitor* volatile* bucket = _buckets + index; - ObjectMonitor* monitor = AtomicAccess::load(bucket); + ObjectMonitor* monitor = AtomicAccess::load_acquire(bucket); if (monitor == tombstone() || monitor == nullptr) { // Not found @@ -139,27 +221,23 @@ public: if (monitor != removed_entry() && monitor->object_peek() == obj) { // Found matching monitor. - OrderAccess::acquire(); return monitor; } index = (index + 1) & _capacity_mask; - if (index == start_index) { - // Not found - wrap around. - break; - } + assert(index != start_index, "invariant"); } - // Rehashing could have stareted by now, but if a monitor has been inserted in a + // Rehashing could have started by now, but if a monitor has been inserted in a // newer table, it was inserted after the get linearization point. return nullptr; } - ObjectMonitor* get_set(oop obj, ObjectMonitor* new_monitor, int hash) { - // Acquire any tombstones and relocations if prev transitioned to null. + ObjectMonitor* prepare_insert(oop obj, ObjectMonitor* new_monitor, intptr_t hash) { + // Acquire any tomb stones and relocations if prev transitioned to null. Table* prev = AtomicAccess::load_acquire(&_prev); if (prev != nullptr) { - ObjectMonitor* result = prev->get_set(obj, new_monitor, hash); + ObjectMonitor* result = prev->prepare_insert(obj, new_monitor, hash); if (result != nullptr) { return result; } @@ -170,10 +248,56 @@ public: for (;;) { ObjectMonitor* volatile* bucket = _buckets + index; - ObjectMonitor* monitor = AtomicAccess::load(bucket); + ObjectMonitor* monitor = AtomicAccess::load_acquire(bucket); if (monitor == nullptr) { - // Empty slot to install the new monitor. + // Found an empty slot to install the new monitor in. + // To avoid concurrent inserts succeeding, place a tomb stone here. + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, tombstone(), memory_order_relaxed); + if (result == monitor) { + // Success! Nobody will try to insert here again, except reinsert from rehashing. + return nullptr; + } + monitor = result; + } + + if (monitor == tombstone()) { + // Can't insert into this table. + return nullptr; + } + + if (monitor != removed_entry() && monitor->object_peek() == obj) { + // Found matching monitor. + return monitor; + } + + index = (index + 1) & _capacity_mask; + assert(index != start_index, "invariant"); + } + } + + ObjectMonitor* get_set(oop obj, ObjectMonitor* new_monitor, intptr_t hash) { + // Acquire any tombstones and relocations if prev transitioned to null. + Table* prev = AtomicAccess::load_acquire(&_prev); + if (prev != nullptr) { + // Sprinkle tombstones in previous tables to force concurrent inserters + // to the latest table. We only really want to try inserting in the + // latest table. + ObjectMonitor* result = prev->prepare_insert(obj, new_monitor, hash); + if (result != nullptr) { + return result; + } + } + + const size_t start_index = size_t(hash) & _capacity_mask; + size_t index = start_index; + + for (;;) { + ObjectMonitor* volatile* bucket = _buckets + index; + ObjectMonitor* monitor = AtomicAccess::load_acquire(bucket); + + if (monitor == nullptr) { + // Empty slot to install the new monitor if (try_inc_items_count()) { // Succeeding in claiming an item. ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, new_monitor, memory_order_release); @@ -188,7 +312,7 @@ public: } else { // Out of allowance; leaving place for rehashing to succeed. // To avoid concurrent inserts succeeding, place a tombstone here. - ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, tombstone()); + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, tombstone(), memory_order_acq_rel); if (result == monitor) { // Success; nobody will try to insert here again, except reinsert from rehashing. return nullptr; @@ -208,59 +332,62 @@ public: } index = (index + 1) & _capacity_mask; - if (index == start_index) { - // No slot to install in this table. - return nullptr; - } + assert(index != start_index, "invariant"); } } - void remove(oop obj, ObjectMonitor* old_monitor, int hash) { - // Acquire any tombstones and relocations if prev transitioned to null. - Table* prev = AtomicAccess::load_acquire(&_prev); - if (prev != nullptr) { - prev->remove(obj, old_monitor, hash); - } - + void remove(oop obj, ObjectMonitor* old_monitor, intptr_t hash) { const size_t start_index = size_t(hash) & _capacity_mask; size_t index = start_index; for (;;) { ObjectMonitor* volatile* bucket = _buckets + index; - ObjectMonitor* monitor = AtomicAccess::load(bucket); + ObjectMonitor* monitor = AtomicAccess::load_acquire(bucket); if (monitor == nullptr) { - // Monitor does not exist in this table. - return; + // The monitor does not exist in this table. + break; + } + + if (monitor == tombstone()) { + // Stop searching at tombstones. + break; } if (monitor == old_monitor) { - // Found matching entry; remove it. - AtomicAccess::cmpxchg(bucket, monitor, removed_entry()); - return; + // Found matching entry; remove it + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, removed_entry(), memory_order_relaxed); + assert(result == monitor, "should not fail"); + break; } index = (index + 1) & _capacity_mask; - if (index == start_index) { - // Not found - return; - } + assert(index != start_index, "invariant"); + } + + // Old versions are removed after newer versions to ensure that observing + // the monitor removed and then doing a subsequent lookup results in there + // still not being a monitor, instead of flickering back to being there. + // Only the deflation thread rebuilds and unlinks tables, so we do not need + // any concurrency safe prev read below. + if (_prev != nullptr) { + _prev->remove(obj, old_monitor, hash); } } void reinsert(oop obj, ObjectMonitor* new_monitor) { - int hash = obj->mark().hash(); + intptr_t hash = obj->mark().hash(); const size_t start_index = size_t(hash) & _capacity_mask; size_t index = start_index; for (;;) { ObjectMonitor* volatile* bucket = _buckets + index; - ObjectMonitor* monitor = AtomicAccess::load(bucket); + ObjectMonitor* monitor = AtomicAccess::load_acquire(bucket); if (monitor == nullptr) { // Empty slot to install the new monitor. - ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, new_monitor, memory_order_release); + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, new_monitor, memory_order_acq_rel); if (result == monitor) { // Success - unconditionally increment. inc_items_count(); @@ -274,7 +401,7 @@ public: if (monitor == tombstone()) { // A concurrent inserter did not get enough allowance in the table. // But reinsert always succeeds - we will take the spot. - ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, new_monitor, memory_order_release); + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, new_monitor, memory_order_acq_rel); if (result == monitor) { // Success - unconditionally increment. inc_items_count(); @@ -285,12 +412,25 @@ public: monitor = result; } + if (monitor == removed_entry()) { + // A removed entry can be flipped back with reinsertion. + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, new_monitor, memory_order_release); + if (result == monitor) { + // Success - but don't increment; the initial entry did that for us. + return; + } + + // Another monitor was installed. + monitor = result; + } + assert(monitor != nullptr, "invariant"); assert(monitor != tombstone(), "invariant"); - assert(monitor == removed_entry() || monitor->object_peek() != obj, "invariant"); + assert(monitor != removed_entry(), "invariant"); + assert(monitor->object_peek() != obj, "invariant"); index = (index + 1) & _capacity_mask; - assert(index != start_index, "should never be full"); + assert(index != start_index, "invariant"); } } @@ -306,19 +446,19 @@ public: JavaThread* current = JavaThread::current(); - // Relocate entries from prev after + // Relocate entries from prev. for (size_t index = 0; index <= prev->_capacity_mask; index++) { - if ((index & 128) == 0) { + if ((index & 127) == 0) { // Poll for safepoints to improve time to safepoint ThreadBlockInVM tbivm(current); } ObjectMonitor* volatile* bucket = prev->_buckets + index; - ObjectMonitor* monitor = AtomicAccess::load(bucket); + ObjectMonitor* monitor = AtomicAccess::load_acquire(bucket); if (monitor == nullptr) { // Empty slot; put a tombstone there. - ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, tombstone(), memory_order_relaxed); + ObjectMonitor* result = AtomicAccess::cmpxchg(bucket, monitor, tombstone(), memory_order_acq_rel); if (result == nullptr) { // Success; move to next entry. continue; @@ -331,7 +471,11 @@ public: if (monitor != tombstone() && monitor != removed_entry()) { // A monitor oop obj = monitor->object_peek(); - if (!monitor->is_being_async_deflated() && obj != nullptr) { + if (obj != nullptr) { + // In the current implementation the deflation thread drives + // the rebuilding, and it will already have removed any entry + // it has deflated. The assert is ony here to make sure. + assert(!monitor->is_being_async_deflated(), "Should be"); // Re-insert still live monitor. reinsert(obj, monitor); } @@ -348,23 +492,23 @@ void ObjectMonitorTable::create() { } ObjectMonitor* ObjectMonitorTable::monitor_get(Thread* current, oop obj) { - const int hash = obj->mark().hash(); + const intptr_t hash = obj->mark().hash(); Table* curr = AtomicAccess::load_acquire(&_curr); - - return curr->get(obj, hash); + ObjectMonitor* monitor = curr->get(obj, hash); + OrderAccess::acquire(); + return monitor; } // Returns a new table to try inserting into. -ObjectMonitorTable::Table *ObjectMonitorTable::grow_table(Table *curr) { - Table *new_table = AtomicAccess::load(&_curr); +ObjectMonitorTable::Table* ObjectMonitorTable::grow_table(Table* curr) { + Table* new_table = AtomicAccess::load(&_curr); if (new_table != curr) { // Table changed; no need to try further return new_table; } new_table = new Table(curr->capacity() << 1, curr); - Table *result = - AtomicAccess::cmpxchg(&_curr, curr, new_table, memory_order_acq_rel); + Table* result = AtomicAccess::cmpxchg(&_curr, curr, new_table, memory_order_acq_rel); if (result == curr) { // Successfully started rehashing. log_info(monitorinflation)("Growing object monitor table"); @@ -379,7 +523,7 @@ ObjectMonitorTable::Table *ObjectMonitorTable::grow_table(Table *curr) { } ObjectMonitor* ObjectMonitorTable::monitor_put_get(Thread* current, ObjectMonitor* monitor, oop obj) { - const int hash = obj->mark().hash(); + const intptr_t hash = obj->mark().hash(); Table* curr = AtomicAccess::load_acquire(&_curr); for (;;) { @@ -387,8 +531,8 @@ ObjectMonitor* ObjectMonitorTable::monitor_put_get(Thread* current, ObjectMonito ObjectMonitor* result = curr->get_set(obj, monitor, hash); if (result != nullptr) { return result; - // Table rehashing started; try again in the new table } + // The table's limit was reached, we need to grow it. curr = grow_table(curr); } } @@ -399,10 +543,11 @@ void ObjectMonitorTable::remove_monitor_entry(Thread* current, ObjectMonitor* mo // Defer removal until subsequent rebuilding. return; } - const int hash = obj->mark().hash(); + const intptr_t hash = obj->mark().hash(); Table* curr = AtomicAccess::load_acquire(&_curr); curr->remove(obj, monitor, hash); + assert(monitor_get(current, obj) != monitor, "should have been removed"); } // Before handshake; rehash and unlink tables. @@ -411,9 +556,11 @@ void ObjectMonitorTable::rebuild(GrowableArray* delete_list) { { Table* curr = AtomicAccess::load_acquire(&_curr); new_table = new Table(curr->capacity(), curr); - Table* result = AtomicAccess::cmpxchg(&_curr, curr, new_table, memory_order_release); + Table* result = AtomicAccess::cmpxchg(&_curr, curr, new_table, memory_order_acq_rel); if (result != curr) { - // Somebody else racingly started rehashing; try again. + // Somebody else racingly started rehashing. Delete the + // new_table and treat somebody else's table as the new one. + delete new_table; new_table = result; } }