8273626: G1: Factor out concurrent segmented array from G1CardSetAllocator

Reviewed-by: tschatzl, ayang
2026-01-28 12:09:14 +00:00 · 2021-10-19 12:24:21 +00:00 · 2021-10-19 12:24:21 +00:00 · d17d81a8b2
commit d17d81a8b2
parent a4491f2253
8 changed files with 520 additions and 325 deletions
--- a/src/hotspot/share/gc/g1/g1CardSet.cpp
+++ b/src/hotspot/share/gc/g1/g1CardSet.cpp
@ -65,6 +65,8 @@ G1CardSetConfiguration::G1CardSetConfiguration() :
  _cards_in_howl_bitmap_threshold = _num_cards_in_howl_bitmap * (double)G1RemSetCoarsenHowlBitmapToHowlFullPercent / 100;
  _bitmap_hash_mask = ~(~(0) << _log2_num_cards_in_howl_bitmap);

+  init_card_set_alloc_options();
+
  log_configuration();
 }

@ -89,9 +91,23 @@ G1CardSetConfiguration::G1CardSetConfiguration(uint inline_ptr_bits_per_card,
  _log2_num_cards_in_howl_bitmap = log2i_exact(_num_cards_in_howl_bitmap);
  _bitmap_hash_mask = ~(~(0) << _log2_num_cards_in_howl_bitmap);

+  init_card_set_alloc_options();
+
  log_configuration();
 }

+G1CardSetConfiguration::~G1CardSetConfiguration() {
+  FREE_C_HEAP_ARRAY(size_t, _card_set_alloc_options);
+}
+
+void G1CardSetConfiguration::init_card_set_alloc_options() {
+  _card_set_alloc_options = NEW_C_HEAP_ARRAY(G1CardSetAllocOptions, num_mem_object_types(), mtGC);
+  new (&_card_set_alloc_options[0]) G1CardSetAllocOptions((uint)CardSetHash::get_node_size());
+  new (&_card_set_alloc_options[1]) G1CardSetAllocOptions((uint)G1CardSetArray::size_in_bytes(_num_cards_in_array), 2, 256);
+  new (&_card_set_alloc_options[2]) G1CardSetAllocOptions((uint)G1CardSetBitMap::size_in_bytes(_num_cards_in_howl_bitmap), 2, 256);
+  new (&_card_set_alloc_options[3]) G1CardSetAllocOptions((uint)G1CardSetHowl::size_in_bytes(_num_buckets_in_howl), 2, 256);
+}
+
 void G1CardSetConfiguration::log_configuration() {
  log_debug_p(gc, remset)("Card Set container configuration: "
                          "InlinePtr #elems %u size %zu "
@ -112,15 +128,8 @@ uint G1CardSetConfiguration::num_cards_in_inline_ptr(uint bits_per_card) {
  return G1CardSetInlinePtr::max_cards_in_inline_ptr(bits_per_card);
 }

-G1CardSetAllocOptions* G1CardSetConfiguration::mem_object_alloc_options() {
-  G1CardSetAllocOptions* result = NEW_C_HEAP_ARRAY(G1CardSetAllocOptions, num_mem_object_types(), mtGC);
-
-  result[0] = { (uint)CardSetHash::get_node_size() };
-  result[1] = { (uint)G1CardSetArray::size_in_bytes(num_cards_in_array()), 2, 256 };
-  result[2] = { (uint)G1CardSetBitMap::size_in_bytes(num_cards_in_howl_bitmap()), 2, 256 };
-  result[3] = { (uint)G1CardSetHowl::size_in_bytes(num_buckets_in_howl()), 2, 256 };
-
-  return result;
+const G1CardSetAllocOptions* G1CardSetConfiguration::mem_object_alloc_options(uint idx) {
+  return &_card_set_alloc_options[idx];
 }

 const char* G1CardSetConfiguration::mem_object_type_name_str(uint index) {
--- a/src/hotspot/share/gc/g1/g1CardSet.hpp
+++ b/src/hotspot/share/gc/g1/g1CardSet.hpp
@ -32,7 +32,6 @@
 #include "utilities/lockFreeStack.hpp"

 class G1CardSetAllocOptions;
-class G1CardSetBufferList;
 class G1CardSetHashTable;
 class G1CardSetHashTableValue;
 class G1CardSetMemoryManager;
@ -60,6 +59,10 @@ class G1CardSetConfiguration {
  uint _log2_num_cards_in_howl_bitmap;
  size_t _bitmap_hash_mask;

+  G1CardSetAllocOptions* _card_set_alloc_options;
+
+  void init_card_set_alloc_options();
+
  void log_configuration();
 public:

@ -73,6 +76,8 @@ public:
                         double cards_in_howl_threshold,
                         uint max_cards_in_cardset);

+  ~G1CardSetConfiguration();
+
  // Inline pointer configuration
  uint inline_ptr_bits_per_card() const { return _inline_ptr_bits_per_card; }
  uint num_cards_in_inline_ptr() const;
@ -108,9 +113,8 @@ public:
  // Number of distinctly sized memory objects on the card set heap.
  // Currently contains CHT-Nodes, ArrayOfCards, BitMaps, Howl
  static constexpr uint num_mem_object_types() { return 4; }
-  // Returns the memory allocation options for the memory objects on the card set heap. The returned
-  // array must be freed by the caller.
-  G1CardSetAllocOptions* mem_object_alloc_options();
+  // Returns the memory allocation options for the memory objects on the card set heap.
+  const G1CardSetAllocOptions* mem_object_alloc_options(uint idx);

  // For a given memory object, get a descriptive name.
  static const char* mem_object_type_name_str(uint index);
--- a/src/hotspot/share/gc/g1/g1CardSetFreeMemoryTask.hpp
+++ b/src/hotspot/share/gc/g1/g1CardSetFreeMemoryTask.hpp
@ -31,8 +31,6 @@
 #include "utilities/growableArray.hpp"
 #include "utilities/ticks.hpp"

-class G1CardSetBuffer;
-
 // Task handling deallocation of free card set memory.
 class G1CardSetFreeMemoryTask : public G1ServiceTask {

--- a/src/hotspot/share/gc/g1/g1CardSetMemory.cpp
+++ b/src/hotspot/share/gc/g1/g1CardSetMemory.cpp
@ -30,99 +30,20 @@
 #include "utilities/formatBuffer.hpp"
 #include "utilities/ostream.hpp"

-G1CardSetBuffer::G1CardSetBuffer(uint elem_size, uint num_instances, G1CardSetBuffer* next) :
-    _elem_size(elem_size), _num_elems(num_instances), _next(next), _next_allocate(0) {
-
-  _buffer = NEW_C_HEAP_ARRAY(char, (size_t)_num_elems * elem_size, mtGCCardSet);
-}
-
-G1CardSetBuffer::~G1CardSetBuffer() {
-  FREE_C_HEAP_ARRAY(mtGCCardSet, _buffer);
-}
-
-void* G1CardSetBuffer::get_new_buffer_elem() {
-  if (_next_allocate >= _num_elems) {
-    return nullptr;
-  }
-  uint result = Atomic::fetch_and_add(&_next_allocate, 1u, memory_order_relaxed);
-  if (result >= _num_elems) {
-    return nullptr;
-  }
-  void* r = _buffer + (uint)result * _elem_size;
-  return r;
-}
-
-void G1CardSetBufferList::bulk_add(G1CardSetBuffer& first, G1CardSetBuffer& last, size_t num, size_t mem_size) {
-  _list.prepend(first, last);
-  Atomic::add(&_num_buffers, num, memory_order_relaxed);
-  Atomic::add(&_mem_size, mem_size, memory_order_relaxed);
-}
-
-void G1CardSetBufferList::print_on(outputStream* out, const char* prefix) {
-  out->print_cr("%s: buffers %zu size %zu", prefix, Atomic::load(&_num_buffers), Atomic::load(&_mem_size));
-}
-
-G1CardSetBuffer* G1CardSetBufferList::get() {
-  GlobalCounter::CriticalSection cs(Thread::current());
-
-  G1CardSetBuffer* result = _list.pop();
-  if (result != nullptr) {
-    Atomic::dec(&_num_buffers, memory_order_relaxed);
-    Atomic::sub(&_mem_size, result->mem_size(), memory_order_relaxed);
-  }
-  return result;
-}
-
-G1CardSetBuffer* G1CardSetBufferList::get_all(size_t& num_buffers, size_t& mem_size) {
-  GlobalCounter::CriticalSection cs(Thread::current());
-
-  G1CardSetBuffer* result = _list.pop_all();
-  num_buffers = Atomic::load(&_num_buffers);
-  mem_size = Atomic::load(&_mem_size);
-
-  if (result != nullptr) {
-    Atomic::sub(&_num_buffers, num_buffers, memory_order_relaxed);
-    Atomic::sub(&_mem_size, mem_size, memory_order_relaxed);
-  }
-  return result;
-}
-
-void G1CardSetBufferList::free_all() {
-  size_t num_freed = 0;
-  size_t mem_size_freed = 0;
-  G1CardSetBuffer* cur;
-
-  while ((cur = _list.pop()) != nullptr) {
-    mem_size_freed += cur->mem_size();
-    num_freed++;
-    delete cur;
-  }
-
-  Atomic::sub(&_num_buffers, num_freed, memory_order_relaxed);
-  Atomic::sub(&_mem_size, mem_size_freed, memory_order_relaxed);
-}

 template <class Elem>
 G1CardSetAllocator<Elem>::G1CardSetAllocator(const char* name,
-                                             const G1CardSetAllocOptions& buffer_options,
+                                             const G1CardSetAllocOptions* buffer_options,
                                             G1CardSetBufferList* free_buffer_list) :
-  _alloc_options(buffer_options),
-  _first(nullptr),
-  _last(nullptr),
-  _num_buffers(0),
-  _mem_size(0),
-  _free_buffer_list(free_buffer_list),
+  _segmented_array(name, buffer_options, free_buffer_list),
  _transfer_lock(false),
  _free_nodes_list(),
  _pending_nodes_list(),
  _num_pending_nodes(0),
-  _num_free_nodes(0),
-  _num_allocated_nodes(0),
-  _num_available_nodes(0)
+  _num_free_nodes(0)
 {
-  assert(elem_size() >= sizeof(G1CardSetContainer), "Element instance size %u for allocator %s too small",
-         elem_size(), name);
-  assert(_free_buffer_list != nullptr, "precondition!");
+  uint elem_size = _segmented_array.elem_size();
+  assert(elem_size >= sizeof(G1CardSetContainer), "Element instance size %u for allocator %s too small", elem_size, name);
 }

 template <class Elem>
@ -164,7 +85,6 @@ bool G1CardSetAllocator<Elem>::try_transfer_pending() {
 template <class Elem>
 void G1CardSetAllocator<Elem>::free(Elem* elem) {
  assert(elem != nullptr, "precondition");
-  assert(elem_size() >= sizeof(G1CardSetContainer), "size mismatch");
  // Desired minimum transfer batch size.  There is relatively little
  // importance to the specific number.  It shouldn't be too big, else
  // we're wasting space when the release rate is low.  If the release
@ -192,47 +112,27 @@ template <class Elem>
 void G1CardSetAllocator<Elem>::drop_all() {
  _free_nodes_list.pop_all();
  _pending_nodes_list.pop_all();
-  G1CardSetBuffer* cur = Atomic::load_acquire(&_first);
-
-  if (cur != nullptr) {
-    assert(_last != nullptr, "If there is at least one element, there must be a last one.");
-
-    G1CardSetBuffer* first = cur;
-#ifdef ASSERT
-    // Check list consistency.
-    G1CardSetBuffer* last = cur;
-    uint num_buffers = 0;
-    size_t mem_size = 0;
-    while (cur != nullptr) {
-      mem_size += cur->mem_size();
-      num_buffers++;
-
-      G1CardSetBuffer* next = cur->next();
-      last = cur;
-      cur = next;
-    }
-#endif
-    assert(num_buffers == _num_buffers, "Buffer count inconsistent %u %u", num_buffers, _num_buffers);
-    assert(mem_size == _mem_size, "Memory size inconsistent");
-    assert(last == _last, "Inconsistent last element");
-
-    _free_buffer_list->bulk_add(*first, *_last, _num_buffers, _mem_size);
-  }
-
-  _first = nullptr;
-  _last = nullptr;
-  _num_available_nodes = 0;
-  _num_allocated_nodes = 0;
  _num_pending_nodes = 0;
-  _num_buffers = 0;
-  _mem_size = 0;
  _num_free_nodes = 0;
 }

 template <class Elem>
 void G1CardSetAllocator<Elem>::print(outputStream* os) {
+  uint num_allocated_nodes = _segmented_array.num_allocated_nodes();
+  uint num_available_nodes = _segmented_array.num_available_nodes();
+  uint highest = _segmented_array.first_array_buffer() != nullptr
+               ? _segmented_array.first_array_buffer()->num_elems()
+               : 0;
+  uint num_buffers = _segmented_array.num_buffers();
  os->print("MA " PTR_FORMAT ": %u elems pending (allocated %u available %u) used %.3f highest %u buffers %u size %zu ",
-                p2i(this), _num_pending_nodes, _num_allocated_nodes, _num_available_nodes, percent_of(_num_allocated_nodes - _num_pending_nodes, _num_available_nodes), _first != nullptr ? _first->num_elems() : 0, _num_buffers, mem_size());
+            p2i(this),
+            _num_pending_nodes,
+            num_allocated_nodes,
+            num_available_nodes,
+            percent_of(num_allocated_nodes - _num_pending_nodes, num_available_nodes),
+            highest,
+            num_buffers,
+            mem_size());
 }

 G1CardSetMemoryStats::G1CardSetMemoryStats() {
@ -411,13 +311,11 @@ G1CardSetMemoryManager::G1CardSetMemoryManager(G1CardSetConfiguration* config,
  _allocators = NEW_C_HEAP_ARRAY(G1CardSetAllocator<G1CardSetContainer>,
                                 _config->num_mem_object_types(),
                                 mtGC);
-  G1CardSetAllocOptions* alloc_options = _config->mem_object_alloc_options();
  for (uint i = 0; i < num_mem_object_types(); i++) {
    new (&_allocators[i]) G1CardSetAllocator<G1CardSetContainer>(_config->mem_object_type_name_str(i),
-                                                                 alloc_options[i],
+                                                                 _config->mem_object_alloc_options(i),
                                                                 free_list_pool->free_list(i));
  }
-  FREE_C_HEAP_ARRAY(size_t, alloc_options);
 }

 uint G1CardSetMemoryManager::num_mem_object_types() const {
--- a/src/hotspot/share/gc/g1/g1CardSetMemory.hpp
+++ b/src/hotspot/share/gc/g1/g1CardSetMemory.hpp
@ -27,6 +27,8 @@

 #include "gc/g1/g1CardSet.hpp"
 #include "gc/g1/g1CardSetContainers.hpp"
+#include "gc/g1/g1CardSetContainers.inline.hpp"
+#include "gc/g1/g1SegmentedArray.hpp"
 #include "memory/allocation.hpp"
 #include "utilities/growableArray.hpp"
 #include "utilities/lockFreeStack.hpp"
@ -36,130 +38,29 @@ class outputStream;

 // Collects G1CardSetAllocator options/heuristics. Called by G1CardSetAllocator
 // to determine the next size of the allocated G1CardSetBuffer.
-class G1CardSetAllocOptions {
-  uint _elem_size;
-  uint _initial_num_elems;
-  // Defines a limit to the number of elements in the buffer
-  uint _max_num_elems;
-
-  uint exponential_expand(uint prev_num_elems) {
+class G1CardSetAllocOptions : public G1SegmentedArrayAllocOptions {
+  uint exponential_expand(uint prev_num_elems) const {
    return clamp(prev_num_elems * 2, _initial_num_elems, _max_num_elems);
  }

 public:
  static const uint BufferAlignment = 8;
-  static const uint MinimumBufferSize = 8;
-  static const uint MaximumBufferSize =  UINT_MAX / 2;

  G1CardSetAllocOptions(uint elem_size, uint initial_num_elems = MinimumBufferSize, uint max_num_elems = MaximumBufferSize) :
-    _elem_size(align_up(elem_size, BufferAlignment)),
-    _initial_num_elems(initial_num_elems),
-    _max_num_elems(max_num_elems) {
+    G1SegmentedArrayAllocOptions(align_up(elem_size, BufferAlignment), initial_num_elems, max_num_elems, BufferAlignment) {
  }

-  uint next_num_elems(uint prev_num_elems) {
+  virtual uint next_num_elems(uint prev_num_elems) const override {
    return exponential_expand(prev_num_elems);
  }
-
-  uint elem_size () const {return _elem_size;}
 };

-// A single buffer/arena containing _num_elems blocks of memory of _elem_size.
-// G1CardSetBuffers can be linked together using a singly linked list.
-class G1CardSetBuffer : public CHeapObj<mtGCCardSet> {
-  uint _elem_size;
-  uint _num_elems;
+typedef G1SegmentedArrayBuffer<mtGCCardSet> G1CardSetBuffer;

-  G1CardSetBuffer* volatile _next;
-
-  char* _buffer;  // Actual data.
-
-  // Index into the next free block to allocate into. Full if equal (or larger)
-  // to _num_elems (can be larger because we atomically increment this value and
-  // check only afterwards if the allocation has been successful).
-  uint volatile _next_allocate;
-
-public:
-  G1CardSetBuffer(uint elem_size, uint num_elems, G1CardSetBuffer* next);
-  ~G1CardSetBuffer();
-
-  G1CardSetBuffer* volatile* next_addr() { return &_next; }
-
-  void* get_new_buffer_elem();
-
-  uint num_elems() const { return _num_elems; }
-
-  G1CardSetBuffer* next() const { return _next; }
-
-  void set_next(G1CardSetBuffer* next) {
-    assert(next != this, " loop condition");
-    _next = next;
-  }
-
-  void reset(G1CardSetBuffer* next) {
-    _next_allocate = 0;
-    assert(next != this, " loop condition");
-    set_next(next);
-    memset((void*)_buffer, 0, (size_t)_num_elems * _elem_size);
-  }
-
-  uint elem_size() const { return _elem_size; }
-
-  size_t mem_size() const { return sizeof(*this) + (size_t)_num_elems * _elem_size; }
-
-  bool is_full() const { return _next_allocate >= _num_elems; }
-};
-
-// Set of (free) G1CardSetBuffers. The assumed usage is that allocation
-// to it and removal of elements is strictly separate, but every action may be
-// performed by multiple threads at the same time.
-// Counts and memory usage are current on a best-effort basis if accessed concurrently.
-class G1CardSetBufferList {
-  static G1CardSetBuffer* volatile* next_ptr(G1CardSetBuffer& node) {
-    return node.next_addr();
-  }
-  typedef LockFreeStack<G1CardSetBuffer, &next_ptr> NodeStack;
-
-  NodeStack _list;
-
-  volatile size_t _num_buffers;
-  volatile size_t _mem_size;
-
-public:
-  G1CardSetBufferList() : _list(), _num_buffers(0), _mem_size(0) { }
-  ~G1CardSetBufferList() { free_all(); }
-
-  void bulk_add(G1CardSetBuffer& first, G1CardSetBuffer& last, size_t num, size_t mem_size);
-  void add(G1CardSetBuffer& elem) { _list.prepend(elem); }
-
-  G1CardSetBuffer* get();
-  G1CardSetBuffer* get_all(size_t& num_buffers, size_t& mem_size);
-
-  // Give back all memory to the OS.
-  void free_all();
-
-  void print_on(outputStream* out, const char* prefix = "");
-
-  size_t num_buffers() const { return Atomic::load(&_num_buffers); }
-  size_t mem_size() const { return Atomic::load(&_mem_size); }
-};
+typedef G1SegmentedArrayBufferList<mtGCCardSet> G1CardSetBufferList;

 // Arena-like allocator for (card set) heap memory objects (Elem elements).
 //
-// Actual allocation from the C heap occurs on G1CardSetBuffer basis, i.e. sets
-// of elements. The assumed allocation pattern for these G1CardSetBuffer elements
-// is assumed to be strictly two-phased:
-//
-// - in the first phase, G1CardSetBuffers are allocated from the C heap (or a free
-// list given at initialization time). This allocation may occur in parallel. This
-// typically corresponds to a single mutator phase, but may extend over multiple.
-//
-// - in the second phase, G1CardSetBuffers are given back in bulk to the free list.
-// This is typically done during a GC pause.
-//
-// Some third party is responsible for giving back memory from the free list to
-// the operating system.
-//
 // Allocation and deallocation in the first phase on G1CardSetContainer basis
 // may occur by multiple threads at once.
 //
@ -168,7 +69,7 @@ public:
 // none, this class allocates a new G1CardSetBuffer (allocated from the C heap,
 // asking the G1CardSetAllocOptions instance about sizes etc) and uses that one.
 //
-// The G1CardSetContainerOnHeaps free list is a linked list of G1CardSetContainers
+// The NodeStack free list is a linked list of G1CardSetContainers
 // within all G1CardSetBuffer instances allocated so far. It uses a separate
 // pending list and global synchronization to avoid the ABA problem when the
 // user frees a memory object.
@ -184,24 +85,13 @@ template <class Elem>
 class G1CardSetAllocator {
  // G1CardSetBuffer management.

-  // G1CardSetAllocOptions provides parameters for allocation buffer
-  // sizing and expansion.
-  G1CardSetAllocOptions _alloc_options;
-
-  G1CardSetBuffer* volatile _first;       // The (start of the) list of all buffers.
-  G1CardSetBuffer* _last;                 // The last element of the list of all buffers.
-  volatile uint _num_buffers;             // Number of assigned buffers to this allocator.
-  volatile size_t _mem_size;              // Memory used by all buffers.
-
-  G1CardSetBufferList* _free_buffer_list; // The global free buffer list to
-                                          // preferentially get new buffers from.
-
+  typedef G1SegmentedArray<Elem, mtGCCardSet> SegmentedArray;
  // G1CardSetContainer node management within the G1CardSetBuffers allocated
  // by this allocator.
-
  static G1CardSetContainer* volatile* next_ptr(G1CardSetContainer& node);
  typedef LockFreeStack<G1CardSetContainer, &G1CardSetAllocator::next_ptr> NodeStack;

+  SegmentedArray _segmented_array;
  volatile bool _transfer_lock;
  NodeStack _free_nodes_list;
  NodeStack _pending_nodes_list;
@ -209,9 +99,6 @@ class G1CardSetAllocator {
  volatile uint _num_pending_nodes;   // Number of nodes in the pending list.
  volatile uint _num_free_nodes;      // Number of nodes in the free list.

-  volatile uint _num_allocated_nodes; // Number of total nodes allocated and in use.
-  volatile uint _num_available_nodes; // Number of nodes available in all buffers (allocated + free + pending + not yet used).
-
  // Try to transfer nodes from _pending_nodes_list to _free_nodes_list, with a
  // synchronization delay for any in-progress pops from the _free_nodes_list
  // to solve ABA here.
@ -219,13 +106,9 @@ class G1CardSetAllocator {

  uint num_free_elems() const;

-  G1CardSetBuffer* create_new_buffer(G1CardSetBuffer* const prev);
-
-  uint elem_size() const { return _alloc_options.elem_size(); }
-
 public:
  G1CardSetAllocator(const char* name,
-                     const G1CardSetAllocOptions& buffer_options,
+                     const G1CardSetAllocOptions* buffer_options,
                     G1CardSetBufferList* free_buffer_list);
  ~G1CardSetAllocator() {
    drop_all();
@ -238,17 +121,17 @@ public:
  // be called in a globally synchronized area.
  void drop_all();

-  uint num_buffers() const;
-
  size_t mem_size() const {
    return sizeof(*this) +
-      num_buffers() * sizeof(G1CardSetBuffer) + (size_t)_num_available_nodes * elem_size();
+      _segmented_array.num_buffers() * sizeof(G1CardSetBuffer) + _segmented_array.num_available_nodes() * _segmented_array.elem_size();
  }

  size_t wasted_mem_size() const {
-    return ((size_t)_num_available_nodes - (_num_allocated_nodes - _num_pending_nodes)) * elem_size();
+    return (_segmented_array.num_available_nodes() - (_segmented_array.num_allocated_nodes() - _num_pending_nodes)) * _segmented_array.elem_size();
  }

+  inline uint num_buffers() { return _segmented_array.num_buffers(); }
+
  void print(outputStream* os);
 };

--- a/src/hotspot/share/gc/g1/g1CardSetMemory.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1CardSetMemory.inline.hpp
@ -27,6 +27,7 @@

 #include "gc/g1/g1CardSetMemory.hpp"
 #include "gc/g1/g1CardSetContainers.hpp"
+#include "gc/g1/g1SegmentedArray.inline.hpp"
 #include "utilities/ostream.hpp"

 #include "gc/g1/g1CardSetContainers.inline.hpp"
@ -37,42 +38,9 @@ G1CardSetContainer* volatile* G1CardSetAllocator<Elem>::next_ptr(G1CardSetContai
  return node.next_addr();
 }

-template <class Elem>
-G1CardSetBuffer* G1CardSetAllocator<Elem>::create_new_buffer(G1CardSetBuffer* const prev) {
-
-  // Take an existing buffer if available.
-  G1CardSetBuffer* next = _free_buffer_list->get();
-  if (next == nullptr) {
-    uint prev_num_elems = (prev != nullptr) ? prev->num_elems() : 0;
-    uint num_elems = _alloc_options.next_num_elems(prev_num_elems);
-    next = new G1CardSetBuffer(elem_size(), num_elems, prev);
-  } else {
-    assert(elem_size() == next->elem_size() , "Mismatch %d != %d Elem %zu", elem_size(), next->elem_size(), sizeof(Elem));
-    next->reset(prev);
-  }
-
-  // Install it as current allocation buffer.
-  G1CardSetBuffer* old = Atomic::cmpxchg(&_first, prev, next);
-  if (old != prev) {
-    // Somebody else installed the buffer, use that one.
-    delete next;
-    return old;
-  } else {
-    // Did we install the first element in the list? If so, this is also the last.
-    if (prev == nullptr) {
-      _last = next;
-    }
-    // Successfully installed the buffer into the list.
-    Atomic::inc(&_num_buffers, memory_order_relaxed);
-    Atomic::add(&_mem_size, next->mem_size(), memory_order_relaxed);
-    Atomic::add(&_num_available_nodes, next->num_elems(), memory_order_relaxed);
-    return next;
-  }
-}
-
 template <class Elem>
 Elem* G1CardSetAllocator<Elem>::allocate() {
-  assert(elem_size() > 0, "instance size not set.");
+  assert(_segmented_array.elem_size() > 0, "instance size not set.");

  if (num_free_elems() > 0) {
    // Pop under critical section to deal with ABA problem
@ -88,22 +56,9 @@ Elem* G1CardSetAllocator<Elem>::allocate() {
    }
  }

-  G1CardSetBuffer* cur = Atomic::load_acquire(&_first);
-  if (cur == nullptr) {
-    cur = create_new_buffer(cur);
-  }
-
-  while (true) {
-    Elem* elem = (Elem*)cur->get_new_buffer_elem();
-    if (elem != nullptr) {
-      Atomic::inc(&_num_allocated_nodes, memory_order_relaxed);
-      guarantee(is_aligned(elem, 8), "result " PTR_FORMAT " not aligned", p2i(elem));
-      return elem;
-    }
-    // The buffer is full. Next round.
-    assert(cur->is_full(), "must be");
-    cur = create_new_buffer(cur);
-  }
+  Elem* elem = _segmented_array.allocate();
+  assert(elem != nullptr, "must be");
+  return elem;
 }

 inline uint8_t* G1CardSetMemoryManager::allocate(uint type) {
@ -119,11 +74,6 @@ inline void G1CardSetMemoryManager::free_node(void* value) {
  free(0, value);
 }

-template <class Elem>
-inline uint G1CardSetAllocator<Elem>::num_buffers() const {
-  return Atomic::load(&_num_buffers);
-}
-
 template <class Elem>
 inline uint G1CardSetAllocator<Elem>::num_free_elems() const {
  return Atomic::load(&_num_free_nodes);
--- a/src/hotspot/share/gc/g1/g1SegmentedArray.hpp
+++ b/src/hotspot/share/gc/g1/g1SegmentedArray.hpp
@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Huawei Technologies Co. Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_G1_G1SEGMENTEDARRAY_HPP
+#define SHARE_GC_G1_G1SEGMENTEDARRAY_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/lockFreeStack.hpp"
+
+// A single buffer/arena containing _num_elems blocks of memory of _elem_size.
+// G1SegmentedArrayBuffers can be linked together using a singly linked list.
+template<MEMFLAGS flag>
+class G1SegmentedArrayBuffer : public CHeapObj<flag> {
+  const uint _elem_size;
+  const uint _num_elems;
+
+  G1SegmentedArrayBuffer* volatile _next;
+
+  char* _buffer;  // Actual data.
+
+  // Index into the next free block to allocate into. Full if equal (or larger)
+  // to _num_elems (can be larger because we atomically increment this value and
+  // check only afterwards if the allocation has been successful).
+  uint volatile _next_allocate;
+
+public:
+  G1SegmentedArrayBuffer(uint elem_size, uint num_elems, G1SegmentedArrayBuffer* next);
+  ~G1SegmentedArrayBuffer();
+
+  G1SegmentedArrayBuffer* volatile* next_addr() { return &_next; }
+
+  void* get_new_buffer_elem();
+
+  uint num_elems() const { return _num_elems; }
+
+  G1SegmentedArrayBuffer* next() const { return _next; }
+
+  void set_next(G1SegmentedArrayBuffer* next) {
+    assert(next != this, " loop condition");
+    _next = next;
+  }
+
+  void reset(G1SegmentedArrayBuffer* next) {
+    _next_allocate = 0;
+    assert(next != this, " loop condition");
+    set_next(next);
+    memset((void*)_buffer, 0, (size_t)_num_elems * _elem_size);
+  }
+
+  uint elem_size() const { return _elem_size; }
+
+  size_t mem_size() const { return sizeof(*this) + (size_t)_num_elems * _elem_size; }
+
+  bool is_full() const { return _next_allocate >= _num_elems; }
+};
+
+// Set of (free) G1SegmentedArrayBuffers. The assumed usage is that allocation
+// to it and removal of elements is strictly separate, but every action may be
+// performed by multiple threads at the same time.
+// Counts and memory usage are current on a best-effort basis if accessed concurrently.
+template<MEMFLAGS flag>
+class G1SegmentedArrayBufferList {
+  static G1SegmentedArrayBuffer<flag>* volatile* next_ptr(G1SegmentedArrayBuffer<flag>& node) {
+    return node.next_addr();
+  }
+  typedef LockFreeStack<G1SegmentedArrayBuffer<flag>, &G1SegmentedArrayBufferList::next_ptr> NodeStack;
+
+  NodeStack _list;
+
+  volatile size_t _num_buffers;
+  volatile size_t _mem_size;
+
+public:
+  G1SegmentedArrayBufferList() : _list(), _num_buffers(0), _mem_size(0) { }
+  ~G1SegmentedArrayBufferList() { free_all(); }
+
+  void bulk_add(G1SegmentedArrayBuffer<flag>& first, G1SegmentedArrayBuffer<flag>& last, size_t num, size_t mem_size);
+  void add(G1SegmentedArrayBuffer<flag>& elem) { _list.prepend(elem); }
+
+  G1SegmentedArrayBuffer<flag>* get();
+  G1SegmentedArrayBuffer<flag>* get_all(size_t& num_buffers, size_t& mem_size);
+
+  // Give back all memory to the OS.
+  void free_all();
+
+  void print_on(outputStream* out, const char* prefix = "");
+
+  size_t num_buffers() const { return Atomic::load(&_num_buffers); }
+  size_t mem_size() const { return Atomic::load(&_mem_size); }
+};
+
+// Configuration for G1SegmentedArray, e.g element size, element number of next G1SegmentedArrayBuffer.
+class G1SegmentedArrayAllocOptions {
+
+protected:
+  uint _elem_size;
+  uint _initial_num_elems;
+  // Defines a limit to the number of elements in the buffer
+  uint _max_num_elems;
+  uint _alignment;
+
+  static const uint BufferAlignment = 4;
+  static const uint MinimumBufferSize = 8;
+  static const uint MaximumBufferSize =  UINT_MAX / 2;
+
+public:
+  G1SegmentedArrayAllocOptions(uint elem_size, uint initial_num_elems, uint max_num_elems, uint alignment) :
+    _elem_size(elem_size),
+    _initial_num_elems(initial_num_elems),
+    _max_num_elems(max_num_elems),
+    _alignment(alignment) {
+  }
+
+  virtual uint next_num_elems(uint prev_num_elems) const {
+    return _initial_num_elems;
+  }
+
+  uint elem_size() const { return _elem_size; }
+
+  uint alignment() const { return _alignment; }
+};
+
+// A segmented array where G1SegmentedArrayBuffer is the segment, and
+// G1SegmentedArrayBufferList is the free list to cache G1SegmentedArrayBuffer,
+// and G1SegmentedArrayAllocOptions is the configuration for G1SegmentedArray
+// attributes.
+//
+// Implementation details as below:
+//
+// Arena-like allocator for (card set, or ...) heap memory objects (Elem elements).
+//
+// Actual allocation from the C heap occurs on G1SegmentedArrayBuffer basis, i.e. segments
+// of elements. The assumed allocation pattern for these G1SegmentedArrayBuffer elements
+// is assumed to be strictly two-phased:
+//
+// - in the first phase, G1SegmentedArrayBuffers are allocated from the C heap (or a free
+// list given at initialization time). This allocation may occur in parallel. This
+// typically corresponds to a single mutator phase, but may extend over multiple.
+//
+// - in the second phase, G1SegmentedArrayBuffers are given back in bulk to the free list.
+// This is typically done during a GC pause.
+//
+// Some third party is responsible for giving back memory from the free list to
+// the operating system.
+//
+// Allocation and deallocation in the first phase basis may occur by multiple threads at once.
+//
+// The class also manages a few counters for statistics using atomic operations.
+// Their values are only consistent within each other with extra global
+// synchronization.
+template <class Elem, MEMFLAGS flag>
+class G1SegmentedArray {
+  // G1SegmentedArrayAllocOptions provides parameters for allocation buffer
+  // sizing and expansion.
+  const G1SegmentedArrayAllocOptions* _alloc_options;
+
+  G1SegmentedArrayBuffer<flag>* volatile _first;       // The (start of the) list of all buffers.
+  G1SegmentedArrayBuffer<flag>* _last;                 // The last element of the list of all buffers.
+  volatile uint _num_buffers;                          // Number of assigned buffers to this allocator.
+  volatile size_t _mem_size;                           // Memory used by all buffers.
+
+  G1SegmentedArrayBufferList<flag>* _free_buffer_list; // The global free buffer list to
+                                                       // preferentially get new buffers from.
+
+  volatile uint _num_available_nodes; // Number of nodes available in all buffers (allocated + free + pending + not yet used).
+  volatile uint _num_allocated_nodes; // Number of total nodes allocated and in use.
+
+private:
+  inline G1SegmentedArrayBuffer<flag>* create_new_buffer(G1SegmentedArrayBuffer<flag>* const prev);
+
+public:
+  const G1SegmentedArrayBuffer<flag>* first_array_buffer() const { return Atomic::load(&_first); }
+
+  uint num_available_nodes() const { return Atomic::load(&_num_available_nodes); }
+  uint num_allocated_nodes() const { return Atomic::load(&_num_allocated_nodes); }
+
+  inline uint elem_size() const;
+
+  G1SegmentedArray(const char* name,
+                   const G1SegmentedArrayAllocOptions* buffer_options,
+                   G1SegmentedArrayBufferList<flag>* free_buffer_list);
+  ~G1SegmentedArray() {
+    drop_all();
+  }
+
+  // Deallocate all buffers to the free buffer list and reset this allocator. Must
+  // be called in a globally synchronized area.
+  void drop_all();
+
+  inline Elem* allocate();
+
+  inline uint num_buffers() const;
+};
+
+#endif //SHARE_GC_G1_G1SEGMENTEDARRAY_HPP
--- a/src/hotspot/share/gc/g1/g1SegmentedArray.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1SegmentedArray.inline.hpp
@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Huawei Technologies Co. Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_G1_G1SEGMENTEDARRAY_INLINE_HPP
+#define SHARE_GC_G1_G1SEGMENTEDARRAY_INLINE_HPP
+
+#include "gc/g1/g1SegmentedArray.hpp"
+#include "runtime/atomic.hpp"
+#include "utilities/globalCounter.inline.hpp"
+
+template<MEMFLAGS flag>
+G1SegmentedArrayBuffer<flag>::G1SegmentedArrayBuffer(uint elem_size, uint num_instances, G1SegmentedArrayBuffer* next) :
+  _elem_size(elem_size), _num_elems(num_instances), _next(next), _next_allocate(0) {
+
+  _buffer = NEW_C_HEAP_ARRAY(char, (size_t)_num_elems * elem_size, mtGCCardSet);
+}
+
+template<MEMFLAGS flag>
+G1SegmentedArrayBuffer<flag>::~G1SegmentedArrayBuffer() {
+  FREE_C_HEAP_ARRAY(mtGCCardSet, _buffer);
+}
+
+template<MEMFLAGS flag>
+void* G1SegmentedArrayBuffer<flag>::get_new_buffer_elem() {
+  if (_next_allocate >= _num_elems) {
+    return nullptr;
+  }
+  uint result = Atomic::fetch_and_add(&_next_allocate, 1u, memory_order_relaxed);
+  if (result >= _num_elems) {
+    return nullptr;
+  }
+  void* r = _buffer + (uint)result * _elem_size;
+  return r;
+}
+
+template<MEMFLAGS flag>
+void G1SegmentedArrayBufferList<flag>::bulk_add(G1SegmentedArrayBuffer<flag>& first,
+                                                G1SegmentedArrayBuffer<flag>& last,
+                                                size_t num,
+                                                size_t mem_size) {
+  _list.prepend(first, last);
+  Atomic::add(&_num_buffers, num, memory_order_relaxed);
+  Atomic::add(&_mem_size, mem_size, memory_order_relaxed);
+}
+
+template<MEMFLAGS flag>
+void G1SegmentedArrayBufferList<flag>::print_on(outputStream* out, const char* prefix) {
+  out->print_cr("%s: buffers %zu size %zu",
+                prefix, Atomic::load(&_num_buffers), Atomic::load(&_mem_size));
+}
+
+template<MEMFLAGS flag>
+G1SegmentedArrayBuffer<flag>* G1SegmentedArrayBufferList<flag>::get() {
+  GlobalCounter::CriticalSection cs(Thread::current());
+
+  G1SegmentedArrayBuffer<flag>* result = _list.pop();
+  if (result != nullptr) {
+    Atomic::dec(&_num_buffers, memory_order_relaxed);
+    Atomic::sub(&_mem_size, result->mem_size(), memory_order_relaxed);
+  }
+  return result;
+}
+
+template<MEMFLAGS flag>
+G1SegmentedArrayBuffer<flag>* G1SegmentedArrayBufferList<flag>::get_all(size_t& num_buffers,
+                                                                        size_t& mem_size) {
+  GlobalCounter::CriticalSection cs(Thread::current());
+
+  G1SegmentedArrayBuffer<flag>* result = _list.pop_all();
+  num_buffers = Atomic::load(&_num_buffers);
+  mem_size = Atomic::load(&_mem_size);
+
+  if (result != nullptr) {
+    Atomic::sub(&_num_buffers, num_buffers, memory_order_relaxed);
+    Atomic::sub(&_mem_size, mem_size, memory_order_relaxed);
+  }
+  return result;
+}
+
+template<MEMFLAGS flag>
+void G1SegmentedArrayBufferList<flag>::free_all() {
+  size_t num_freed = 0;
+  size_t mem_size_freed = 0;
+  G1SegmentedArrayBuffer<flag>* cur;
+
+  while ((cur = _list.pop()) != nullptr) {
+    mem_size_freed += cur->mem_size();
+    num_freed++;
+    delete cur;
+  }
+
+  Atomic::sub(&_num_buffers, num_freed, memory_order_relaxed);
+  Atomic::sub(&_mem_size, mem_size_freed, memory_order_relaxed);
+}
+
+template <class Elem, MEMFLAGS flag>
+G1SegmentedArrayBuffer<flag>* G1SegmentedArray<Elem, flag>::create_new_buffer(G1SegmentedArrayBuffer<flag>* const prev) {
+  // Take an existing buffer if available.
+  G1SegmentedArrayBuffer<flag>* next = _free_buffer_list->get();
+  if (next == nullptr) {
+    uint prev_num_elems = (prev != nullptr) ? prev->num_elems() : 0;
+    uint num_elems = _alloc_options->next_num_elems(prev_num_elems);
+    next = new G1SegmentedArrayBuffer<flag>(elem_size(), num_elems, prev);
+  } else {
+    assert(elem_size() == next->elem_size() ,
+           "Mismatch %d != %d Elem %zu", elem_size(), next->elem_size(), sizeof(Elem));
+    next->reset(prev);
+  }
+
+  // Install it as current allocation buffer.
+  G1SegmentedArrayBuffer<flag>* old = Atomic::cmpxchg(&_first, prev, next);
+  if (old != prev) {
+    // Somebody else installed the buffer, use that one.
+    delete next;
+    return old;
+  } else {
+    // Did we install the first element in the list? If so, this is also the last.
+    if (prev == nullptr) {
+      _last = next;
+    }
+    // Successfully installed the buffer into the list.
+    Atomic::inc(&_num_buffers, memory_order_relaxed);
+    Atomic::add(&_mem_size, next->mem_size(), memory_order_relaxed);
+    Atomic::add(&_num_available_nodes, next->num_elems(), memory_order_relaxed);
+    return next;
+  }
+}
+
+template <class Elem, MEMFLAGS flag>
+uint G1SegmentedArray<Elem, flag>::elem_size() const {
+  return _alloc_options->elem_size();
+}
+
+template <class Elem, MEMFLAGS flag>
+G1SegmentedArray<Elem, flag>::G1SegmentedArray(const char* name,
+                                               const G1SegmentedArrayAllocOptions* buffer_options,
+                                               G1SegmentedArrayBufferList<flag>* free_buffer_list) :
+     _alloc_options(buffer_options),
+     _first(nullptr),
+     _last(nullptr),
+     _num_buffers(0),
+     _mem_size(0),
+     _free_buffer_list(free_buffer_list),
+     _num_available_nodes(0),
+     _num_allocated_nodes(0) {
+  assert(_free_buffer_list != nullptr, "precondition!");
+}
+
+template <class Elem, MEMFLAGS flag>
+void G1SegmentedArray<Elem, flag>::drop_all() {
+  G1SegmentedArrayBuffer<flag>* cur = Atomic::load_acquire(&_first);
+
+  if (cur != nullptr) {
+    assert(_last != nullptr, "If there is at least one element, there must be a last one.");
+
+    G1SegmentedArrayBuffer<flag>* first = cur;
+#ifdef ASSERT
+    // Check list consistency.
+    G1SegmentedArrayBuffer<flag>* last = cur;
+    uint num_buffers = 0;
+    size_t mem_size = 0;
+    while (cur != nullptr) {
+      mem_size += cur->mem_size();
+      num_buffers++;
+
+      G1SegmentedArrayBuffer<flag>* next = cur->next();
+      last = cur;
+      cur = next;
+    }
+#endif
+    assert(num_buffers == _num_buffers, "Buffer count inconsistent %u %u", num_buffers, _num_buffers);
+    assert(mem_size == _mem_size, "Memory size inconsistent");
+    assert(last == _last, "Inconsistent last element");
+
+    _free_buffer_list->bulk_add(*first, *_last, _num_buffers, _mem_size);
+  }
+
+  _first = nullptr;
+  _last = nullptr;
+  _num_buffers = 0;
+  _mem_size = 0;
+  _num_available_nodes = 0;
+  _num_allocated_nodes = 0;
+}
+
+template <class Elem, MEMFLAGS flag>
+Elem* G1SegmentedArray<Elem, flag>::allocate() {
+  assert(elem_size() > 0, "instance size not set.");
+
+  G1SegmentedArrayBuffer<flag>* cur = Atomic::load_acquire(&_first);
+  if (cur == nullptr) {
+    cur = create_new_buffer(cur);
+  }
+
+  while (true) {
+    Elem* elem = (Elem*)cur->get_new_buffer_elem();
+    if (elem != nullptr) {
+      Atomic::inc(&_num_allocated_nodes, memory_order_relaxed);
+      guarantee(is_aligned(elem, _alloc_options->alignment()),
+                "result " PTR_FORMAT " not aligned at %u", p2i(elem), _alloc_options->alignment());
+      return elem;
+    }
+    // The buffer is full. Next round.
+    assert(cur->is_full(), "must be");
+    cur = create_new_buffer(cur);
+  }
+}
+
+template <class Elem, MEMFLAGS flag>
+inline uint G1SegmentedArray<Elem, flag>::num_buffers() const {
+  return Atomic::load(&_num_buffers);
+}
+
+#endif //SHARE_GC_G1_G1SEGMENTEDARRAY_INLINE_HPP