Merge branch 'JDK-8364991' into JDK-8367530

2026-02-18 14:25:33 +00:00 · 2025-11-03 16:06:27 +01:00 · 2025-11-03 16:06:27 +01:00 · b7c3334948
commit b7c3334948
parent b79bce1f40 e6a3f0dabe
769 changed files with 20627 additions and 10246 deletions
--- a/.gitignore
+++ b/.gitignore
@ -25,3 +25,4 @@ NashornProfile.txt
 **/core.[0-9]*
 *.rej
 *.orig
+test/benchmarks/**/target
--- a/make/hotspot/lib/CompileJvm.gmk
+++ b/make/hotspot/lib/CompileJvm.gmk
@ -158,6 +158,10 @@ ifeq ($(call isTargetOs, windows), true)
    WIN_EXPORT_FILE := $(JVM_OUTPUTDIR)/win-exports.def
  endif

+  ifeq ($(SHIP_DEBUG_SYMBOLS), public)
+    CFLAGS_STRIPPED_DEBUGINFO := -DHAS_STRIPPED_DEBUGINFO
+  endif
+
  JVM_LDFLAGS += -def:$(WIN_EXPORT_FILE)
 endif

@ -183,6 +187,7 @@ $(eval $(call SetupJdkLibrary, BUILD_LIBJVM, \
    CFLAGS := $(JVM_CFLAGS), \
    abstract_vm_version.cpp_CXXFLAGS := $(CFLAGS_VM_VERSION), \
    arguments.cpp_CXXFLAGS := $(CFLAGS_VM_VERSION), \
+    whitebox.cpp_CXXFLAGS := $(CFLAGS_STRIPPED_DEBUGINFO), \
    DISABLED_WARNINGS_gcc := $(DISABLED_WARNINGS_gcc), \
    DISABLED_WARNINGS_gcc_ad_$(HOTSPOT_TARGET_CPU_ARCH).cpp := nonnull, \
    DISABLED_WARNINGS_gcc_bytecodeInterpreter.cpp := unused-label, \
--- a/make/langtools/tools/javacserver/server/CompilerThreadPool.java
+++ b/make/langtools/tools/javacserver/server/CompilerThreadPool.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -61,7 +61,7 @@ public class CompilerThreadPool {
        } catch (InterruptedException ie) {
            // (Re-)Cancel if current thread also interrupted
            pool.shutdownNow();
-            // Preserve interrupt status
+            // Preserve interrupted status
            Thread.currentThread().interrupt();
        }
    }
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@ -1266,39 +1266,39 @@ source %{
    // adlc register classes to make AArch64 rheapbase (r27) and rfp (r29)
    // registers conditionally reserved.

-    _ANY_REG32_mask = _ALL_REG32_mask;
-    _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));
+    _ANY_REG32_mask.assignFrom(_ALL_REG32_mask);
+    _ANY_REG32_mask.remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));

-    _ANY_REG_mask = _ALL_REG_mask;
+    _ANY_REG_mask.assignFrom(_ALL_REG_mask);

-    _PTR_REG_mask = _ALL_REG_mask;
+    _PTR_REG_mask.assignFrom(_ALL_REG_mask);

-    _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
-    _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
+    _NO_SPECIAL_REG32_mask.assignFrom(_ALL_REG32_mask);
+    _NO_SPECIAL_REG32_mask.subtract(_NON_ALLOCATABLE_REG32_mask);

-    _NO_SPECIAL_REG_mask = _ALL_REG_mask;
-    _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
+    _NO_SPECIAL_REG_mask.assignFrom(_ALL_REG_mask);
+    _NO_SPECIAL_REG_mask.subtract(_NON_ALLOCATABLE_REG_mask);

-    _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
-    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
+    _NO_SPECIAL_PTR_REG_mask.assignFrom(_ALL_REG_mask);
+    _NO_SPECIAL_PTR_REG_mask.subtract(_NON_ALLOCATABLE_REG_mask);

    // r27 is not allocatable when compressed oops is on and heapbase is not
    // zero, compressed klass pointers doesn't use r27 after JDK-8234794
    if (UseCompressedOops && (CompressedOops::base() != nullptr)) {
-      _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
-      _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
-      _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
+      _NO_SPECIAL_REG32_mask.remove(OptoReg::as_OptoReg(r27->as_VMReg()));
+      _NO_SPECIAL_REG_mask.remove(OptoReg::as_OptoReg(r27->as_VMReg()));
+      _NO_SPECIAL_PTR_REG_mask.remove(OptoReg::as_OptoReg(r27->as_VMReg()));
    }

    // r29 is not allocatable when PreserveFramePointer is on
    if (PreserveFramePointer) {
-      _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
-      _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
-      _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
+      _NO_SPECIAL_REG32_mask.remove(OptoReg::as_OptoReg(r29->as_VMReg()));
+      _NO_SPECIAL_REG_mask.remove(OptoReg::as_OptoReg(r29->as_VMReg()));
+      _NO_SPECIAL_PTR_REG_mask.remove(OptoReg::as_OptoReg(r29->as_VMReg()));
    }

-    _NO_SPECIAL_NO_RFP_PTR_REG_mask = _NO_SPECIAL_PTR_REG_mask;
-    _NO_SPECIAL_NO_RFP_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
+    _NO_SPECIAL_NO_RFP_PTR_REG_mask.assignFrom(_NO_SPECIAL_PTR_REG_mask);
+    _NO_SPECIAL_NO_RFP_PTR_REG_mask.remove(OptoReg::as_OptoReg(r29->as_VMReg()));
  }

  // Optimizaton of volatile gets and puts
@ -1734,7 +1734,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
  }

 //=============================================================================
-const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;

 int ConstantTable::calculate_table_base_offset() const {
  return 0;  // absolute addressing, no offset
@ -2520,10 +2520,10 @@ uint Matcher::int_pressure_limit()
  // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
  // derived pointers and lastly fail to spill after reaching maximum
  // number of iterations. Lowering the default pressure threshold to
-  // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
+  // (_NO_SPECIAL_REG32_mask.size() minus 1) forces CallNode to become
  // a high register pressure area of the code so that split_DEF can
  // generate DefinitionSpillCopy for the derived pointer.
-  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
+  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.size() - 1;
  if (!PreserveFramePointer) {
    // When PreserveFramePointer is off, frame pointer is allocatable,
    // but different from other SOC registers, it is excluded from
@ -2538,34 +2538,34 @@ uint Matcher::int_pressure_limit()
 uint Matcher::float_pressure_limit()
 {
  // _FLOAT_REG_mask is generated by adlc from the float_reg register class.
-  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
+  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.size() : FLOATPRESSURE;
 }

 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
  return false;
 }

-RegMask Matcher::divI_proj_mask() {
+const RegMask& Matcher::divI_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for MODI projection of divmodI.
-RegMask Matcher::modI_proj_mask() {
+const RegMask& Matcher::modI_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for DIVL projection of divmodL.
-RegMask Matcher::divL_proj_mask() {
+const RegMask& Matcher::divL_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for MODL projection of divmodL.
-RegMask Matcher::modL_proj_mask() {
+const RegMask& Matcher::modL_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@ -7081,29 +7081,31 @@ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
 %}

 instruct vcompressB(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2,
-                    vReg tmp3, vReg tmp4, pReg ptmp, pRegGov pgtmp) %{
+                    vReg tmp3, pReg ptmp, pRegGov pgtmp) %{
  predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_BYTE);
-  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ptmp, TEMP pgtmp);
+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp, TEMP pgtmp);
  match(Set dst (CompressV src pg));
-  format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, tmp4, $ptmp, $pgtmp" %}
+  format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, $ptmp, $pgtmp" %}
  ins_encode %{
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
    __ sve_compress_byte($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
-                         $tmp1$$FloatRegister,$tmp2$$FloatRegister,
-                         $tmp3$$FloatRegister,$tmp4$$FloatRegister,
-                         $ptmp$$PRegister, $pgtmp$$PRegister);
+                         $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister,
+                         $ptmp$$PRegister, $pgtmp$$PRegister, length_in_bytes);
  %}
  ins_pipe(pipe_slow);
 %}

-instruct vcompressS(vReg dst, vReg src, pReg pg,
-                    vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
+instruct vcompressS(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
  predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_SHORT);
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP pgtmp);
  match(Set dst (CompressV src pg));
  format %{ "vcompressS $dst, $src, $pg\t# KILL $tmp1, $tmp2, $pgtmp" %}
  ins_encode %{
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    __ sve_dup($tmp1$$FloatRegister, __ H, 0);
    __ sve_compress_short($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
-                          $tmp1$$FloatRegister,$tmp2$$FloatRegister, $pgtmp$$PRegister);
+                          $tmp1$$FloatRegister, $tmp2$$FloatRegister, $pgtmp$$PRegister,
+                          length_in_bytes);
  %}
  ins_pipe(pipe_slow);
 %}
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@ -5069,29 +5069,31 @@ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
 %}

 instruct vcompressB(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2,
-                    vReg tmp3, vReg tmp4, pReg ptmp, pRegGov pgtmp) %{
+                    vReg tmp3, pReg ptmp, pRegGov pgtmp) %{
  predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_BYTE);
-  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ptmp, TEMP pgtmp);
+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp, TEMP pgtmp);
  match(Set dst (CompressV src pg));
-  format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, tmp4, $ptmp, $pgtmp" %}
+  format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, $ptmp, $pgtmp" %}
  ins_encode %{
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
    __ sve_compress_byte($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
-                         $tmp1$$FloatRegister,$tmp2$$FloatRegister,
-                         $tmp3$$FloatRegister,$tmp4$$FloatRegister,
-                         $ptmp$$PRegister, $pgtmp$$PRegister);
+                         $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister,
+                         $ptmp$$PRegister, $pgtmp$$PRegister, length_in_bytes);
  %}
  ins_pipe(pipe_slow);
 %}

-instruct vcompressS(vReg dst, vReg src, pReg pg,
-                    vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
+instruct vcompressS(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
  predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_SHORT);
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP pgtmp);
  match(Set dst (CompressV src pg));
  format %{ "vcompressS $dst, $src, $pg\t# KILL $tmp1, $tmp2, $pgtmp" %}
  ins_encode %{
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    __ sve_dup($tmp1$$FloatRegister, __ H, 0);
    __ sve_compress_short($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
-                          $tmp1$$FloatRegister,$tmp2$$FloatRegister, $pgtmp$$PRegister);
+                          $tmp1$$FloatRegister, $tmp2$$FloatRegister, $pgtmp$$PRegister,
+                          length_in_bytes);
  %}
  ins_pipe(pipe_slow);
 %}
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@ -3486,6 +3486,7 @@ public:
  INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
  INSN(sve_smin,  0b00000100, 0b001010000); // signed minimum vectors
  INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
+  INSN(sve_splice,0b00000101, 0b101100100); // splice two vectors under predicate control, destructive
  INSN(sve_sub,   0b00000100, 0b000001000); // vector sub
  INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
  INSN(sve_umax,  0b00000100, 0b001001000); // unsigned maximum vectors
--- a/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
@ -53,7 +53,6 @@ define_pd_global(size_t, CodeCacheExpansionSize,     32*K );
 define_pd_global(size_t, CodeCacheMinBlockLength,    1);
 define_pd_global(size_t, CodeCacheMinimumUseSpace,   400*K);
 define_pd_global(bool, NeverActAsServerClassMachine, true );
-define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
 define_pd_global(bool, CICompileOSR,                 true );
 #endif // !COMPILER2
 define_pd_global(bool, UseTypeProfile,               false);
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@ -2203,114 +2203,117 @@ void C2_MacroAssembler::sve_gen_mask_imm(PRegister dst, BasicType bt, uint32_t l
 // Pack active elements of src, under the control of mask, into the lowest-numbered elements of dst.
 // Any remaining elements of dst will be filled with zero.
 // Clobbers: rscratch1
-// Preserves: src, mask
+// Preserves: mask, vzr
 void C2_MacroAssembler::sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
-                                           FloatRegister vtmp1, FloatRegister vtmp2,
-                                           PRegister pgtmp) {
+                                           FloatRegister vzr, FloatRegister vtmp,
+                                           PRegister pgtmp, unsigned vector_length_in_bytes) {
  assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
-  assert_different_registers(dst, src, vtmp1, vtmp2);
+  // When called by sve_compress_byte, src and vtmp may be the same register.
+  assert_different_registers(dst, src, vzr);
+  assert_different_registers(dst, vtmp, vzr);
  assert_different_registers(mask, pgtmp);
-
-  // Example input:   src   = 8888 7777 6666 5555 4444 3333 2222 1111
-  //                  mask  = 0001 0000 0000 0001 0001 0000 0001 0001
-  // Expected result: dst   = 0000 0000 0000 8888 5555 4444 2222 1111
-  sve_dup(vtmp2, H, 0);
+  // high <-- low
+  // Example input:   src   = hh gg ff ee dd cc bb aa, one character is 8 bits.
+  //                  mask  = 01 00 00 01 01 00 01 01, one character is 1 bit.
+  // Expected result: dst   = 00 00 00 hh ee dd bb aa

  // Extend lowest half to type INT.
-  // dst = 00004444 00003333 00002222 00001111
+  // dst   =  00dd  00cc  00bb  00aa
  sve_uunpklo(dst, S, src);
-  // pgtmp = 00000001 00000000 00000001 00000001
+  // pgtmp =  0001  0000  0001  0001
  sve_punpklo(pgtmp, mask);
  // Pack the active elements in size of type INT to the right,
  // and fill the remainings with zero.
-  // dst = 00000000 00004444 00002222 00001111
+  // dst   =  0000  00dd  00bb  00aa
  sve_compact(dst, S, dst, pgtmp);
  // Narrow the result back to type SHORT.
-  // dst = 0000 0000 0000 0000 0000 4444 2222 1111
-  sve_uzp1(dst, H, dst, vtmp2);
+  // dst   = 00 00 00 00 00 dd bb aa
+  sve_uzp1(dst, H, dst, vzr);
+
+  // Return if the vector length is no more than MaxVectorSize/2, since the
+  // highest half is invalid.
+  if (vector_length_in_bytes <= (MaxVectorSize >> 1)) {
+    return;
+  }
+
  // Count the active elements of lowest half.
  // rscratch1 = 3
  sve_cntp(rscratch1, S, ptrue, pgtmp);

  // Repeat to the highest half.
-  // pgtmp = 00000001 00000000 00000000 00000001
+  // pgtmp =  0001  0000  0000  0001
  sve_punpkhi(pgtmp, mask);
-  // vtmp1 = 00008888 00007777 00006666 00005555
-  sve_uunpkhi(vtmp1, S, src);
-  // vtmp1 = 00000000 00000000 00008888 00005555
-  sve_compact(vtmp1, S, vtmp1, pgtmp);
-  // vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
-  sve_uzp1(vtmp1, H, vtmp1, vtmp2);
+  // vtmp  =  00hh  00gg  00ff  00ee
+  sve_uunpkhi(vtmp, S, src);
+  // vtmp  =  0000  0000  00hh  00ee
+  sve_compact(vtmp, S, vtmp, pgtmp);
+  // vtmp  = 00 00 00 00 00 00 hh ee
+  sve_uzp1(vtmp, H, vtmp, vzr);

-  // Compressed low:   dst   = 0000 0000 0000 0000 0000 4444 2222 1111
-  // Compressed high:  vtmp1 = 0000 0000 0000 0000 0000 0000 8888  5555
-  // Left shift(cross lane) compressed high with TRUE_CNT lanes,
-  // TRUE_CNT is the number of active elements in the compressed low.
-  neg(rscratch1, rscratch1);
-  // vtmp2 = {4 3 2 1 0 -1 -2 -3}
-  sve_index(vtmp2, H, rscratch1, 1);
-  // vtmp1 = 0000 0000 0000 8888 5555 0000 0000 0000
-  sve_tbl(vtmp1, H, vtmp1, vtmp2);
-
-  // Combine the compressed high(after shifted) with the compressed low.
-  // dst = 0000 0000 0000 8888 5555 4444 2222 1111
-  sve_orr(dst, dst, vtmp1);
+  // pgtmp = 00 00 00 00 00 01 01 01
+  sve_whilelt(pgtmp, H, zr, rscratch1);
+  // Compressed low:  dst  = 00 00 00 00 00 dd bb aa
+  // Compressed high: vtmp = 00 00 00 00 00 00 hh ee
+  // Combine the compressed low with the compressed high:
+  //                  dst  = 00 00 00 hh ee dd bb aa
+  sve_splice(dst, H, pgtmp, vtmp);
 }

 // Clobbers: rscratch1, rscratch2
 // Preserves: src, mask
 void C2_MacroAssembler::sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
-                                          FloatRegister vtmp1, FloatRegister vtmp2,
-                                          FloatRegister vtmp3, FloatRegister vtmp4,
-                                          PRegister ptmp, PRegister pgtmp) {
+                                          FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
+                                          PRegister ptmp, PRegister pgtmp, unsigned vector_length_in_bytes) {
  assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
-  assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3, vtmp4);
+  assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3);
  assert_different_registers(mask, ptmp, pgtmp);
-  // Example input:   src   = 88 77 66 55 44 33 22 11
-  //                  mask  = 01 00 00 01 01 00 01 01
-  // Expected result: dst   = 00 00 00 88 55 44 22 11
+  // high <-- low
+  // Example input:   src   = q p n m l k j i h g f e d c b a, one character is 8 bits.
+  //                  mask  = 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 1, one character is 1 bit.
+  // Expected result: dst   = 0 0 0 0 0 0 0 0 0 0 0 p i g c a
+  FloatRegister vzr = vtmp3;
+  sve_dup(vzr, B, 0);

-  sve_dup(vtmp4, B, 0);
  // Extend lowest half to type SHORT.
-  // vtmp1 = 0044 0033 0022 0011
+  // vtmp1 =  0h  0g  0f  0e  0d  0c  0b  0a
  sve_uunpklo(vtmp1, H, src);
-  // ptmp = 0001 0000 0001 0001
+  // ptmp  =  00  01  00  00  00  01  00  01
  sve_punpklo(ptmp, mask);
+  // Pack the active elements in size of type SHORT to the right,
+  // and fill the remainings with zero.
+  // dst   =  00  00  00  00  00  0g  0c  0a
+  unsigned extended_size = vector_length_in_bytes << 1;
+  sve_compress_short(dst, vtmp1, ptmp, vzr, vtmp2, pgtmp, extended_size > MaxVectorSize ? MaxVectorSize : extended_size);
+  // Narrow the result back to type BYTE.
+  // dst   = 0 0 0 0 0 0 0 0 0 0 0 0 0 g c a
+  sve_uzp1(dst, B, dst, vzr);
+
+  // Return if the vector length is no more than MaxVectorSize/2, since the
+  // highest half is invalid.
+  if (vector_length_in_bytes <= (MaxVectorSize >> 1)) {
+    return;
+  }
  // Count the active elements of lowest half.
  // rscratch2 = 3
  sve_cntp(rscratch2, H, ptrue, ptmp);
-  // Pack the active elements in size of type SHORT to the right,
-  // and fill the remainings with zero.
-  // dst = 0000 0044 0022 0011
-  sve_compress_short(dst, vtmp1, ptmp, vtmp2, vtmp3, pgtmp);
-  // Narrow the result back to type BYTE.
-  // dst = 00 00 00 00 00 44 22 11
-  sve_uzp1(dst, B, dst, vtmp4);

  // Repeat to the highest half.
-  // ptmp = 0001 0000 0000 0001
+  // ptmp  =  00  01  00  00  00  00  00  01
  sve_punpkhi(ptmp, mask);
-  // vtmp1 = 0088 0077 0066 0055
+  // vtmp2 =  0q  0p  0n  0m  0l  0k  0j  0i
  sve_uunpkhi(vtmp2, H, src);
-  // vtmp1 = 0000 0000 0088 0055
-  sve_compress_short(vtmp1, vtmp2, ptmp, vtmp3, vtmp4, pgtmp);
+  // vtmp1 =  00  00  00  00  00  00  0p  0i
+  sve_compress_short(vtmp1, vtmp2, ptmp, vzr, vtmp2, pgtmp, extended_size - MaxVectorSize);
+  // vtmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 p i
+  sve_uzp1(vtmp1, B, vtmp1, vzr);

-  sve_dup(vtmp4, B, 0);
-  // vtmp1 = 00 00 00 00 00 00 88 55
-  sve_uzp1(vtmp1, B, vtmp1, vtmp4);
-
-  // Compressed low:   dst   = 00 00 00 00 00 44 22 11
-  // Compressed high:  vtmp1 = 00 00 00 00 00 00 88 55
-  // Left shift(cross lane) compressed high with TRUE_CNT lanes,
-  // TRUE_CNT is the number of active elements in the compressed low.
-  neg(rscratch2, rscratch2);
-  // vtmp2 = {4 3 2 1 0 -1 -2 -3}
-  sve_index(vtmp2, B, rscratch2, 1);
-  // vtmp1 = 00 00 00 88 55 00 00 00
-  sve_tbl(vtmp1, B, vtmp1, vtmp2);
-  // Combine the compressed high(after shifted) with the compressed low.
-  // dst = 00 00 00 88 55 44 22 11
-  sve_orr(dst, dst, vtmp1);
+  // ptmp  = 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1
+  sve_whilelt(ptmp, B, zr, rscratch2);
+  // Compressed low:  dst   = 0 0 0 0 0 0 0 0 0 0 0 0 0 g c a
+  // Compressed high: vtmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 p i
+  // Combine the compressed low with the compressed high:
+  //                  dst   = 0 0 0 0 0 0 0 0 0 0 0 p i g c a
+  sve_splice(dst, B, ptmp, vtmp1);
 }

 void C2_MacroAssembler::neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ) {
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
@ -173,13 +173,12 @@
  // lowest-numbered elements of dst. Any remaining elements of dst will
  // be filled with zero.
  void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
-                         FloatRegister vtmp1, FloatRegister vtmp2,
-                         FloatRegister vtmp3, FloatRegister vtmp4,
-                         PRegister ptmp, PRegister pgtmp);
+                         FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
+                         PRegister ptmp, PRegister pgtmp, unsigned vector_length_in_bytes);

  void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
-                          FloatRegister vtmp1, FloatRegister vtmp2,
-                          PRegister pgtmp);
+                          FloatRegister vzr, FloatRegister vtmp,
+                          PRegister pgtmp, unsigned vector_length_in_bytes);

  void neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);

--- a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
@ -55,7 +55,6 @@ define_pd_global(size_t, InitialCodeCacheSize,       2496*K); // Integral multip
 define_pd_global(size_t, CodeCacheExpansionSize,     64*K);

 // Ergonomics related flags
-define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
 define_pd_global(intx, RegisterCostAreaRatio,        16000);

 // Peephole and CISC spilling both break the graph, and so makes the
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@ -148,56 +148,34 @@ extern "C" void disnm(intptr_t p);
 //                 strictly should be 64 bit movz #imm16<<0
 //       110___10100 (i.e. requires insn[31:21] == 11010010100)
 //
-class RelocActions {
-protected:
-  typedef int (*reloc_insn)(address insn_addr, address &target);

-  virtual reloc_insn adrpMem() = 0;
-  virtual reloc_insn adrpAdd() = 0;
-  virtual reloc_insn adrpMovk() = 0;
+static uint32_t insn_at(address insn_addr, int n) {
+  return ((uint32_t*)insn_addr)[n];
+}

-  const address _insn_addr;
-  const uint32_t _insn;
-
-  static uint32_t insn_at(address insn_addr, int n) {
-    return ((uint32_t*)insn_addr)[n];
-  }
-  uint32_t insn_at(int n) const {
-    return insn_at(_insn_addr, n);
-  }
+template<typename T>
+class RelocActions : public AllStatic {

 public:

-  RelocActions(address insn_addr) : _insn_addr(insn_addr), _insn(insn_at(insn_addr, 0)) {}
-  RelocActions(address insn_addr, uint32_t insn)
-    :  _insn_addr(insn_addr), _insn(insn) {}
-
-  virtual int unconditionalBranch(address insn_addr, address &target) = 0;
-  virtual int conditionalBranch(address insn_addr, address &target) = 0;
-  virtual int testAndBranch(address insn_addr, address &target) = 0;
-  virtual int loadStore(address insn_addr, address &target) = 0;
-  virtual int adr(address insn_addr, address &target) = 0;
-  virtual int adrp(address insn_addr, address &target, reloc_insn inner) = 0;
-  virtual int immediate(address insn_addr, address &target) = 0;
-  virtual void verify(address insn_addr, address &target) = 0;
-
-  int ALWAYSINLINE run(address insn_addr, address &target) {
+  static int ALWAYSINLINE run(address insn_addr, address &target) {
    int instructions = 1;
+    uint32_t insn = insn_at(insn_addr, 0);

-    uint32_t dispatch = Instruction_aarch64::extract(_insn, 30, 25);
+    uint32_t dispatch = Instruction_aarch64::extract(insn, 30, 25);
    switch(dispatch) {
      case 0b001010:
      case 0b001011: {
-        instructions = unconditionalBranch(insn_addr, target);
+        instructions = T::unconditionalBranch(insn_addr, target);
        break;
      }
      case 0b101010:   // Conditional branch (immediate)
      case 0b011010: { // Compare & branch (immediate)
-        instructions = conditionalBranch(insn_addr, target);
-          break;
+        instructions = T::conditionalBranch(insn_addr, target);
+        break;
      }
      case 0b011011: {
-        instructions = testAndBranch(insn_addr, target);
+        instructions = T::testAndBranch(insn_addr, target);
        break;
      }
      case 0b001100:
@ -209,9 +187,9 @@ public:
      case 0b111100:
      case 0b111110: {
        // load/store
-        if ((Instruction_aarch64::extract(_insn, 29, 24) & 0b111011) == 0b011000) {
+        if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) {
          // Load register (literal)
-          instructions = loadStore(insn_addr, target);
+          instructions = T::loadStore(insn_addr, target);
          break;
        } else {
          // nothing to do
@ -224,27 +202,27 @@ public:
      case 0b101000:
      case 0b111000: {
        // adr/adrp
-        assert(Instruction_aarch64::extract(_insn, 28, 24) == 0b10000, "must be");
-        int shift = Instruction_aarch64::extract(_insn, 31, 31);
+        assert(Instruction_aarch64::extract(insn, 28, 24) == 0b10000, "must be");
+        int shift = Instruction_aarch64::extract(insn, 31, 31);
        if (shift) {
-          uint32_t insn2 = insn_at(1);
+          uint32_t insn2 = insn_at(insn_addr, 1);
          if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
-              Instruction_aarch64::extract(_insn, 4, 0) ==
+              Instruction_aarch64::extract(insn, 4, 0) ==
              Instruction_aarch64::extract(insn2, 9, 5)) {
-            instructions = adrp(insn_addr, target, adrpMem());
+            instructions = T::adrp(insn_addr, target, T::adrpMem);
          } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
-                     Instruction_aarch64::extract(_insn, 4, 0) ==
+                     Instruction_aarch64::extract(insn, 4, 0) ==
                     Instruction_aarch64::extract(insn2, 4, 0)) {
-            instructions = adrp(insn_addr, target, adrpAdd());
+            instructions = T::adrp(insn_addr, target, T::adrpAdd);
          } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
-                     Instruction_aarch64::extract(_insn, 4, 0) ==
+                     Instruction_aarch64::extract(insn, 4, 0) ==
                     Instruction_aarch64::extract(insn2, 4, 0)) {
-            instructions = adrp(insn_addr, target, adrpMovk());
+            instructions = T::adrp(insn_addr, target, T::adrpMovk);
          } else {
            ShouldNotReachHere();
          }
        } else {
-          instructions = adr(insn_addr, target);
+          instructions = T::adr(insn_addr, target);
        }
        break;
      }
@ -252,7 +230,7 @@ public:
      case 0b011001:
      case 0b101001:
      case 0b111001: {
-        instructions = immediate(insn_addr, target);
+        instructions = T::immediate(insn_addr, target);
        break;
      }
      default: {
@ -260,42 +238,36 @@ public:
      }
    }

-    verify(insn_addr, target);
+    T::verify(insn_addr, target);
    return instructions * NativeInstruction::instruction_size;
  }
 };

-class Patcher : public RelocActions {
-  virtual reloc_insn adrpMem() { return &Patcher::adrpMem_impl; }
-  virtual reloc_insn adrpAdd() { return &Patcher::adrpAdd_impl; }
-  virtual reloc_insn adrpMovk() { return &Patcher::adrpMovk_impl; }
-
+class Patcher : public AllStatic {
 public:
-  Patcher(address insn_addr) : RelocActions(insn_addr) {}
-
-  virtual int unconditionalBranch(address insn_addr, address &target) {
+  static int unconditionalBranch(address insn_addr, address &target) {
    intptr_t offset = (target - insn_addr) >> 2;
    Instruction_aarch64::spatch(insn_addr, 25, 0, offset);
    return 1;
  }
-  virtual int conditionalBranch(address insn_addr, address &target) {
+  static int conditionalBranch(address insn_addr, address &target) {
    intptr_t offset = (target - insn_addr) >> 2;
    Instruction_aarch64::spatch(insn_addr, 23, 5, offset);
    return 1;
  }
-  virtual int testAndBranch(address insn_addr, address &target) {
+  static int testAndBranch(address insn_addr, address &target) {
    intptr_t offset = (target - insn_addr) >> 2;
    Instruction_aarch64::spatch(insn_addr, 18, 5, offset);
    return 1;
  }
-  virtual int loadStore(address insn_addr, address &target) {
+  static int loadStore(address insn_addr, address &target) {
    intptr_t offset = (target - insn_addr) >> 2;
    Instruction_aarch64::spatch(insn_addr, 23, 5, offset);
    return 1;
  }
-  virtual int adr(address insn_addr, address &target) {
+  static int adr(address insn_addr, address &target) {
 #ifdef ASSERT
-    assert(Instruction_aarch64::extract(_insn, 28, 24) == 0b10000, "must be");
+    assert(Instruction_aarch64::extract(insn_at(insn_addr, 0), 28, 24) == 0b10000, "must be");
 #endif
    // PC-rel. addressing
    ptrdiff_t offset = target - insn_addr;
@ -305,17 +277,18 @@ public:
    Instruction_aarch64::patch(insn_addr, 30, 29, offset_lo);
    return 1;
  }
-  virtual int adrp(address insn_addr, address &target, reloc_insn inner) {
+  template<typename U>
+  static int adrp(address insn_addr, address &target, U inner) {
    int instructions = 1;
 #ifdef ASSERT
-    assert(Instruction_aarch64::extract(_insn, 28, 24) == 0b10000, "must be");
+    assert(Instruction_aarch64::extract(insn_at(insn_addr, 0), 28, 24) == 0b10000, "must be");
 #endif
    ptrdiff_t offset = target - insn_addr;
    instructions = 2;
    precond(inner != nullptr);
    // Give the inner reloc a chance to modify the target.
    address adjusted_target = target;
-    instructions = (*inner)(insn_addr, adjusted_target);
+    instructions = inner(insn_addr, adjusted_target);
    uintptr_t pc_page = (uintptr_t)insn_addr >> 12;
    uintptr_t adr_page = (uintptr_t)adjusted_target >> 12;
    offset = adr_page - pc_page;
@ -325,7 +298,7 @@ public:
    Instruction_aarch64::patch(insn_addr, 30, 29, offset_lo);
    return instructions;
  }
-  static int adrpMem_impl(address insn_addr, address &target) {
+  static int adrpMem(address insn_addr, address &target) {
    uintptr_t dest = (uintptr_t)target;
    int offset_lo = dest & 0xfff;
    uint32_t insn2 = insn_at(insn_addr, 1);
@ -334,21 +307,21 @@ public:
    guarantee(((dest >> size) << size) == dest, "misaligned target");
    return 2;
  }
-  static int adrpAdd_impl(address insn_addr, address &target) {
+  static int adrpAdd(address insn_addr, address &target) {
    uintptr_t dest = (uintptr_t)target;
    int offset_lo = dest & 0xfff;
    Instruction_aarch64::patch(insn_addr + sizeof (uint32_t), 21, 10, offset_lo);
    return 2;
  }
-  static int adrpMovk_impl(address insn_addr, address &target) {
+  static int adrpMovk(address insn_addr, address &target) {
    uintptr_t dest = uintptr_t(target);
    Instruction_aarch64::patch(insn_addr + sizeof (uint32_t), 20, 5, (uintptr_t)target >> 32);
    dest = (dest & 0xffffffffULL) | (uintptr_t(insn_addr) & 0xffff00000000ULL);
    target = address(dest);
    return 2;
  }
-  virtual int immediate(address insn_addr, address &target) {
-    assert(Instruction_aarch64::extract(_insn, 31, 21) == 0b11010010100, "must be");
+  static int immediate(address insn_addr, address &target) {
+    assert(Instruction_aarch64::extract(insn_at(insn_addr, 0), 31, 21) == 0b11010010100, "must be");
    uint64_t dest = (uint64_t)target;
    // Move wide constant
    assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
@ -358,7 +331,7 @@ public:
    Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
    return 3;
  }
-  virtual void verify(address insn_addr, address &target) {
+  static void verify(address insn_addr, address &target) {
 #ifdef ASSERT
    address address_is = MacroAssembler::target_addr_for_insn(insn_addr);
    if (!(address_is == target)) {
@ -392,56 +365,54 @@ static bool offset_for(uint32_t insn1, uint32_t insn2, ptrdiff_t &byte_offset) {
  return false;
 }

-class AArch64Decoder : public RelocActions {
-  virtual reloc_insn adrpMem() { return &AArch64Decoder::adrpMem_impl; }
-  virtual reloc_insn adrpAdd() { return &AArch64Decoder::adrpAdd_impl; }
-  virtual reloc_insn adrpMovk() { return &AArch64Decoder::adrpMovk_impl; }
-
+class AArch64Decoder : public AllStatic {
 public:
-  AArch64Decoder(address insn_addr, uint32_t insn) : RelocActions(insn_addr, insn) {}

-  virtual int loadStore(address insn_addr, address &target) {
-    intptr_t offset = Instruction_aarch64::sextract(_insn, 23, 5);
+  static int loadStore(address insn_addr, address &target) {
+    intptr_t offset = Instruction_aarch64::sextract(insn_at(insn_addr, 0), 23, 5);
    target = insn_addr + (offset << 2);
    return 1;
  }
-  virtual int unconditionalBranch(address insn_addr, address &target) {
-    intptr_t offset = Instruction_aarch64::sextract(_insn, 25, 0);
+  static int unconditionalBranch(address insn_addr, address &target) {
+    intptr_t offset = Instruction_aarch64::sextract(insn_at(insn_addr, 0), 25, 0);
    target = insn_addr + (offset << 2);
    return 1;
  }
-  virtual int conditionalBranch(address insn_addr, address &target) {
-    intptr_t offset = Instruction_aarch64::sextract(_insn, 23, 5);
+  static int conditionalBranch(address insn_addr, address &target) {
+    intptr_t offset = Instruction_aarch64::sextract(insn_at(insn_addr, 0), 23, 5);
    target = address(((uint64_t)insn_addr + (offset << 2)));
    return 1;
  }
-  virtual int testAndBranch(address insn_addr, address &target) {
-    intptr_t offset = Instruction_aarch64::sextract(_insn, 18, 5);
+  static int testAndBranch(address insn_addr, address &target) {
+    intptr_t offset = Instruction_aarch64::sextract(insn_at(insn_addr, 0), 18, 5);
    target = address(((uint64_t)insn_addr + (offset << 2)));
    return 1;
  }
-  virtual int adr(address insn_addr, address &target) {
+  static int adr(address insn_addr, address &target) {
    // PC-rel. addressing
-    intptr_t offset = Instruction_aarch64::extract(_insn, 30, 29);
-    offset |= Instruction_aarch64::sextract(_insn, 23, 5) << 2;
+    uint32_t insn = insn_at(insn_addr, 0);
+    intptr_t offset = Instruction_aarch64::extract(insn, 30, 29);
+    offset |= Instruction_aarch64::sextract(insn, 23, 5) << 2;
    target = address((uint64_t)insn_addr + offset);
    return 1;
  }
-  virtual int adrp(address insn_addr, address &target, reloc_insn inner) {
-    assert(Instruction_aarch64::extract(_insn, 28, 24) == 0b10000, "must be");
-    intptr_t offset = Instruction_aarch64::extract(_insn, 30, 29);
-    offset |= Instruction_aarch64::sextract(_insn, 23, 5) << 2;
+  template<typename U>
+  static int adrp(address insn_addr, address &target, U inner) {
+    uint32_t insn = insn_at(insn_addr, 0);
+    assert(Instruction_aarch64::extract(insn, 28, 24) == 0b10000, "must be");
+    intptr_t offset = Instruction_aarch64::extract(insn, 30, 29);
+    offset |= Instruction_aarch64::sextract(insn, 23, 5) << 2;
    int shift = 12;
    offset <<= shift;
    uint64_t target_page = ((uint64_t)insn_addr) + offset;
    target_page &= ((uint64_t)-1) << shift;
-    uint32_t insn2 = insn_at(1);
+    uint32_t insn2 = insn_at(insn_addr, 1);
    target = address(target_page);
    precond(inner != nullptr);
-    (*inner)(insn_addr, target);
+    inner(insn_addr, target);
    return 2;
  }
-  static int adrpMem_impl(address insn_addr, address &target) {
+  static int adrpMem(address insn_addr, address &target) {
    uint32_t insn2 = insn_at(insn_addr, 1);
    // Load/store register (unsigned immediate)
    ptrdiff_t byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
@ -450,14 +421,14 @@ public:
    target += byte_offset;
    return 2;
  }
-  static int adrpAdd_impl(address insn_addr, address &target) {
+  static int adrpAdd(address insn_addr, address &target) {
    uint32_t insn2 = insn_at(insn_addr, 1);
    // add (immediate)
    ptrdiff_t byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
    target += byte_offset;
    return 2;
  }
-  static int adrpMovk_impl(address insn_addr, address &target) {
+  static int adrpMovk(address insn_addr, address &target) {
    uint32_t insn2 = insn_at(insn_addr, 1);
    uint64_t dest = uint64_t(target);
    dest = (dest & 0xffff0000ffffffff) |
@ -476,35 +447,33 @@ public:
      return 2;
    }
  }
-  virtual int immediate(address insn_addr, address &target) {
+  static int immediate(address insn_addr, address &target) {
    uint32_t *insns = (uint32_t *)insn_addr;
-    assert(Instruction_aarch64::extract(_insn, 31, 21) == 0b11010010100, "must be");
+    assert(Instruction_aarch64::extract(insns[0], 31, 21) == 0b11010010100, "must be");
    // Move wide constant: movz, movk, movk.  See movptr().
    assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
    assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
-    target = address(uint64_t(Instruction_aarch64::extract(_insn, 20, 5))
-                 + (uint64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16)
-                 + (uint64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32));
+    target = address(uint64_t(Instruction_aarch64::extract(insns[0], 20, 5))
+                  + (uint64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16)
+                  + (uint64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32));
    assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
    assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch");
    return 3;
  }
-  virtual void verify(address insn_addr, address &target) {
+  static void verify(address insn_addr, address &target) {
  }
 };

-address MacroAssembler::target_addr_for_insn(address insn_addr, uint32_t insn) {
-  AArch64Decoder decoder(insn_addr, insn);
+address MacroAssembler::target_addr_for_insn(address insn_addr) {
  address target;
-  decoder.run(insn_addr, target);
+  RelocActions<AArch64Decoder>::run(insn_addr, target);
  return target;
 }

 // Patch any kind of instruction; there may be several instructions.
 // Return the total length (in bytes) of the instructions.
 int MacroAssembler::pd_patch_instruction_size(address insn_addr, address target) {
-  Patcher patcher(insn_addr);
-  return patcher.run(insn_addr, target);
+  return RelocActions<Patcher>::run(insn_addr, target);
 }

 int MacroAssembler::patch_oop(address insn_addr, address o) {
@ -546,11 +515,11 @@ int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
  return 2 * NativeInstruction::instruction_size;
 }

-address MacroAssembler::target_addr_for_insn_or_null(address insn_addr, unsigned insn) {
-  if (NativeInstruction::is_ldrw_to_zr(address(&insn))) {
+address MacroAssembler::target_addr_for_insn_or_null(address insn_addr) {
+  if (NativeInstruction::is_ldrw_to_zr(insn_addr)) {
    return nullptr;
  }
-  return MacroAssembler::target_addr_for_insn(insn_addr, insn);
+  return MacroAssembler::target_addr_for_insn(insn_addr);
 }

 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp) {
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@ -676,16 +676,8 @@ public:
  static bool needs_explicit_null_check(intptr_t offset);
  static bool uses_implicit_null_check(void* address);

-  static address target_addr_for_insn(address insn_addr, unsigned insn);
-  static address target_addr_for_insn_or_null(address insn_addr, unsigned insn);
-  static address target_addr_for_insn(address insn_addr) {
-    unsigned insn = *(unsigned*)insn_addr;
-    return target_addr_for_insn(insn_addr, insn);
-  }
-  static address target_addr_for_insn_or_null(address insn_addr) {
-    unsigned insn = *(unsigned*)insn_addr;
-    return target_addr_for_insn_or_null(insn_addr, insn);
-  }
+  static address target_addr_for_insn(address insn_addr);
+  static address target_addr_for_insn_or_null(address insn_addr);

  // Required platform-specific helpers for Label::patch_instructions.
  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@ -1131,27 +1131,27 @@ bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 }

 // Register for DIVI projection of divmodI
-RegMask Matcher::divI_proj_mask() {
+const RegMask& Matcher::divI_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for MODI projection of divmodI
-RegMask Matcher::modI_proj_mask() {
+const RegMask& Matcher::modI_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for DIVL projection of divmodL
-RegMask Matcher::divL_proj_mask() {
+const RegMask& Matcher::divL_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for MODL projection of divmodL
-RegMask Matcher::modL_proj_mask() {
+const RegMask& Matcher::modL_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 bool maybe_far_call(const CallNode *n) {
--- a/src/hotspot/cpu/arm/c1_globals_arm.hpp
+++ b/src/hotspot/cpu/arm/c1_globals_arm.hpp
@ -54,7 +54,6 @@ define_pd_global(size_t, CodeCacheExpansionSize,     32*K );
 define_pd_global(size_t, CodeCacheMinBlockLength,    1);
 define_pd_global(size_t, CodeCacheMinimumUseSpace,   400*K);
 define_pd_global(bool, NeverActAsServerClassMachine, true);
-define_pd_global(uint64_t, MaxRAM,                   1ULL*G);
 define_pd_global(bool, CICompileOSR,                 true );
 #endif // COMPILER2
 define_pd_global(bool, UseTypeProfile,               false);
--- a/src/hotspot/cpu/arm/c2_globals_arm.hpp
+++ b/src/hotspot/cpu/arm/c2_globals_arm.hpp
@ -80,9 +80,6 @@ define_pd_global(size_t, NonProfiledCodeHeapSize,    21*M);
 define_pd_global(size_t, ProfiledCodeHeapSize,       22*M);
 define_pd_global(size_t, NonNMethodCodeHeapSize,     5*M );
 define_pd_global(size_t, CodeCacheExpansionSize,     64*K);
-
-// Ergonomics related flags
-define_pd_global(uint64_t, MaxRAM,                   128ULL*G);
 #else
 // InitialCodeCacheSize derived from specjbb2000 run.
 define_pd_global(size_t, InitialCodeCacheSize,       1536*K); // Integral multiple of CodeCacheExpansionSize
@ -91,8 +88,6 @@ define_pd_global(size_t, NonProfiledCodeHeapSize,    13*M);
 define_pd_global(size_t, ProfiledCodeHeapSize,       14*M);
 define_pd_global(size_t, NonNMethodCodeHeapSize,     5*M );
 define_pd_global(size_t, CodeCacheExpansionSize,     32*K);
-// Ergonomics related flags
-define_pd_global(uint64_t, MaxRAM,                   4ULL*G);
 #endif
 define_pd_global(size_t, CodeCacheMinBlockLength,    6);
 define_pd_global(size_t, CodeCacheMinimumUseSpace,   400*K);
--- a/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
@ -53,7 +53,6 @@ define_pd_global(size_t,   CodeCacheMinBlockLength,      1);
 define_pd_global(size_t,   CodeCacheMinimumUseSpace,     400*K);
 define_pd_global(bool,     NeverActAsServerClassMachine, true);
 define_pd_global(size_t,   NewSizeThreadIncrease,        16*K);
-define_pd_global(uint64_t, MaxRAM,                       1ULL*G);
 define_pd_global(size_t,   InitialCodeCacheSize,         160*K);
 #endif // !COMPILER2

--- a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
@ -86,7 +86,6 @@ define_pd_global(size_t,   NonNMethodCodeHeapSize,       5*M  );
 define_pd_global(size_t,   CodeCacheExpansionSize,       64*K);

 // Ergonomics related flags
-define_pd_global(uint64_t, MaxRAM,                       128ULL*G);
 define_pd_global(size_t,   CodeCacheMinBlockLength,      6);
 define_pd_global(size_t,   CodeCacheMinimumUseSpace,     400*K);

--- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
@ -133,8 +133,13 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void get_cache_index_at_bcp(Register Rdst, int bcp_offset, size_t index_size);

  void load_resolved_indy_entry(Register cache, Register index);
-  void load_field_entry(Register cache, Register index, int bcp_offset = 1);
-  void load_method_entry(Register cache, Register index, int bcp_offset = 1);
+  void load_field_or_method_entry(bool is_method, Register cache, Register index, int bcp_offset, bool for_fast_bytecode);
+  void load_field_entry(Register cache, Register index, int bcp_offset = 1, bool for_fast_bytecode = false) {
+    load_field_or_method_entry(false, cache, index, bcp_offset, for_fast_bytecode);
+  }
+  void load_method_entry(Register cache, Register index, int bcp_offset = 1, bool for_fast_bytecode = false) {
+    load_field_or_method_entry(true, cache, index, bcp_offset, for_fast_bytecode);
+  }

  void get_u4(Register Rdst, Register Rsrc, int offset, signedOrNot is_signed);

--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@ -468,33 +468,33 @@ void InterpreterMacroAssembler::load_resolved_indy_entry(Register cache, Registe
  add(cache, cache, index);
 }

-void InterpreterMacroAssembler::load_field_entry(Register cache, Register index, int bcp_offset) {
+void InterpreterMacroAssembler::load_field_or_method_entry(bool is_method, Register cache, Register index, int bcp_offset, bool for_fast_bytecode) {
+  const int entry_size     = is_method ? sizeof(ResolvedMethodEntry) : sizeof(ResolvedFieldEntry),
+            base_offset    = is_method ? Array<ResolvedMethodEntry>::base_offset_in_bytes() : Array<ResolvedFieldEntry>::base_offset_in_bytes(),
+            entries_offset = is_method ? in_bytes(ConstantPoolCache::method_entries_offset()) : in_bytes(ConstantPoolCache::field_entries_offset());
+
  // Get index out of bytecode pointer
  get_cache_index_at_bcp(index, bcp_offset, sizeof(u2));
  // Take shortcut if the size is a power of 2
-  if (is_power_of_2(sizeof(ResolvedFieldEntry))) {
+  if (is_power_of_2(entry_size)) {
    // Scale index by power of 2
-    sldi(index, index, log2i_exact(sizeof(ResolvedFieldEntry)));
+    sldi(index, index, log2i_exact(entry_size));
  } else {
    // Scale the index to be the entry index * sizeof(ResolvedFieldEntry)
-    mulli(index, index, sizeof(ResolvedFieldEntry));
+    mulli(index, index, entry_size);
  }
  // Get address of field entries array
-  ld_ptr(cache, in_bytes(ConstantPoolCache::field_entries_offset()), R27_constPoolCache);
-  addi(cache, cache, Array<ResolvedFieldEntry>::base_offset_in_bytes());
+  ld_ptr(cache, entries_offset, R27_constPoolCache);
+  addi(cache, cache, base_offset);
  add(cache, cache, index);
-}

-void InterpreterMacroAssembler::load_method_entry(Register cache, Register index, int bcp_offset) {
-  // Get index out of bytecode pointer
-  get_cache_index_at_bcp(index, bcp_offset, sizeof(u2));
-  // Scale the index to be the entry index * sizeof(ResolvedMethodEntry)
-  mulli(index, index, sizeof(ResolvedMethodEntry));
-
-  // Get address of field entries array
-  ld_ptr(cache, ConstantPoolCache::method_entries_offset(), R27_constPoolCache);
-  addi(cache, cache, Array<ResolvedMethodEntry>::base_offset_in_bytes());
-  add(cache, cache, index); // method_entries + base_offset + scaled index
+  if (for_fast_bytecode) {
+    // Prevent speculative loading from ResolvedFieldEntry/ResolvedMethodEntry as it can miss the info written by another thread.
+    // TemplateTable::patch_bytecode uses release-store.
+    // We reached here via control dependency (Bytecode dispatch has used the rewritten Bytecode).
+    // So, we can use control-isync based ordering.
+    isync();
+  }
 }

 // Load object from cpool->resolved_references(index).
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@ -2450,27 +2450,27 @@ bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
 }

 // Register for DIVI projection of divmodI.
-RegMask Matcher::divI_proj_mask() {
+const RegMask& Matcher::divI_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for MODI projection of divmodI.
-RegMask Matcher::modI_proj_mask() {
+const RegMask& Matcher::modI_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for DIVL projection of divmodL.
-RegMask Matcher::divL_proj_mask() {
+const RegMask& Matcher::divL_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for MODL projection of divmodL.
-RegMask Matcher::modL_proj_mask() {
+const RegMask& Matcher::modL_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 %}
@ -12381,27 +12381,27 @@ instruct countTrailingZerosL_cnttzd(iRegIdst dst, iRegLsrc src) %{
 %}

 // Expand nodes for byte_reverse_int.
-instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
-  effect(DEF dst, USE src, USE pos, USE shift);
+instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 n, immI16 b) %{
+  effect(DEF dst, USE src, USE n, USE b);
  predicate(false);

-  format %{ "INSRWI  $dst, $src, $pos, $shift" %}
+  format %{ "INSRWI  $dst, $src, $n, $b" %}
  size(4);
  ins_encode %{
-    __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
+    __ insrwi($dst$$Register, $src$$Register, $n$$constant, $b$$constant);
  %}
  ins_pipe(pipe_class_default);
 %}

 // As insrwi_a, but with USE_DEF.
-instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
-  effect(USE_DEF dst, USE src, USE pos, USE shift);
+instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 n, immI16 b) %{
+  effect(USE_DEF dst, USE src, USE n, USE b);
  predicate(false);

-  format %{ "INSRWI  $dst, $src, $pos, $shift" %}
+  format %{ "INSRWI  $dst, $src, $n, $b" %}
  size(4);
  ins_encode %{
-    __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
+    __ insrwi($dst$$Register, $src$$Register, $n$$constant, $b$$constant);
  %}
  ins_pipe(pipe_class_default);
 %}
@ -12423,12 +12423,12 @@ instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{
    iRegLdst tmpI3;

    urShiftI_reg_imm(tmpI1, src, imm24);
-    insrwi_a(dst, tmpI1, imm24, imm8);
+    insrwi_a(dst, tmpI1, imm8, imm24);
    urShiftI_reg_imm(tmpI2, src, imm16);
-    insrwi(dst, tmpI2, imm8, imm16);
+    insrwi(dst, tmpI2, imm16, imm8);
    urShiftI_reg_imm(tmpI3, src, imm8);
    insrwi(dst, tmpI3, imm8, imm8);
-    insrwi(dst, src, imm0, imm8);
+    insrwi(dst, src, imm8, imm0);
  %}
 %}

@ -12546,7 +12546,7 @@ instruct bytes_reverse_ushort_Ex(iRegIdst dst, iRegIsrc src) %{
    immI16   imm8 %{ (int)  8 %}

    urShiftI_reg_imm(dst, src, imm8);
-    insrwi(dst, src, imm16, imm8);
+    insrwi(dst, src, imm8, imm16);
  %}
 %}

@ -12575,7 +12575,7 @@ instruct bytes_reverse_short_Ex(iRegIdst dst, iRegIsrc src) %{
    iRegLdst tmpI1;

    urShiftI_reg_imm(tmpI1, src, imm8);
-    insrwi(tmpI1, src, imm16, imm8);
+    insrwi(tmpI1, src, imm8, imm16);
    extsh(dst, tmpI1);
  %}
 %}
--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
@ -148,7 +148,9 @@ void TemplateTable::patch_bytecode(Bytecodes::Code new_bc, Register Rnew_bc, Reg
    __ bind(L_fast_patch);
  }

-  // Patch bytecode.
+  // Patch bytecode with release store to coordinate with ResolvedFieldEntry
+  // and ResolvedMethodEntry loads in fast bytecode codelets.
+  __ release();
  __ stb(Rnew_bc, 0, R14_bcp);

  __ bind(L_patch_done);
@ -312,6 +314,7 @@ void TemplateTable::fast_aldc(LdcType type) {
  // We are resolved if the resolved reference cache entry contains a
  // non-null object (CallSite, etc.)
  __ get_cache_index_at_bcp(R31, 1, index_size);  // Load index.
+  // Only rewritten during link time. So, no need for memory barriers for accessing resolved info.
  __ load_resolved_reference_at_index(R17_tos, R31, R11_scratch1, R12_scratch2, &is_null);

  // Convert null sentinel to null
@ -3114,7 +3117,7 @@ void TemplateTable::fast_storefield(TosState state) {
  const ConditionRegister CR_is_vol = CR2; // Non-volatile condition register (survives runtime call in do_oop_store).

  // Constant pool already resolved => Load flags and offset of field.
-  __ load_field_entry(Rcache, Rscratch);
+  __ load_field_entry(Rcache, Rscratch, 1, /* for_fast_bytecode */ true);
  jvmti_post_field_mod(Rcache, Rscratch, false /* not static */);
  load_resolved_field_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12

@ -3195,7 +3198,7 @@ void TemplateTable::fast_accessfield(TosState state) {
                 // R12_scratch2 used by load_field_cp_cache_entry

  // Constant pool already resolved. Get the field offset.
-  __ load_field_entry(Rcache, Rscratch);
+  __ load_field_entry(Rcache, Rscratch, 1, /* for_fast_bytecode */ true);
  load_resolved_field_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12

  // JVMTI support
@ -3334,7 +3337,7 @@ void TemplateTable::fast_xaccess(TosState state) {
  __ ld(Rclass_or_obj, 0, R18_locals);

  // Constant pool already resolved. Get the field offset.
-  __ load_field_entry(Rcache, Rscratch, 2);
+  __ load_field_entry(Rcache, Rscratch, 2, /* for_fast_bytecode */ true);
  load_resolved_field_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12

  // JVMTI support not needed, since we switch back to single bytecode as soon as debugger attaches.
@ -3495,7 +3498,7 @@ void TemplateTable::fast_invokevfinal(int byte_no) {

  assert(byte_no == f2_byte, "use this argument");
  Register Rcache  = R31;
-  __ load_method_entry(Rcache, R11_scratch1);
+  __ load_method_entry(Rcache, R11_scratch1, 1, /* for_fast_bytecode */ true);
  invokevfinal_helper(Rcache, R11_scratch1, R12_scratch2, R22_tmp2, R23_tmp3);
 }

--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
@ -53,7 +53,6 @@ define_pd_global(size_t, CodeCacheExpansionSize,     32*K );
 define_pd_global(size_t, CodeCacheMinBlockLength,    1);
 define_pd_global(size_t, CodeCacheMinimumUseSpace,   400*K);
 define_pd_global(bool, NeverActAsServerClassMachine, true );
-define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
 define_pd_global(bool, CICompileOSR,                 true );
 #endif // !COMPILER2
 define_pd_global(bool, UseTypeProfile,               false);
--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
@ -55,7 +55,6 @@ define_pd_global(size_t, InitialCodeCacheSize,       2496*K); // Integral multip
 define_pd_global(size_t, CodeCacheExpansionSize,     64*K);

 // Ergonomics related flags
-define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
 define_pd_global(intx, RegisterCostAreaRatio,        16000);

 // Peephole and CISC spilling both break the graph, and so makes the
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
@ -1841,6 +1841,15 @@ void InterpreterMacroAssembler::load_method_entry(Register cache, Register index
 }

 #ifdef ASSERT
+void InterpreterMacroAssembler::verify_field_offset(Register reg) {
+  // Verify the field offset is not in the header, implicitly checks for 0
+  Label L;
+  mv(t0, oopDesc::base_offset_in_bytes());
+  bge(reg, t0, L);
+  stop("bad field offset");
+  bind(L);
+}
+
 void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag,
                                                    const char* msg, bool stop_by_hit) {
  Label L;
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
@ -300,11 +300,10 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void load_field_entry(Register cache, Register index, int bcp_offset = 1);
  void load_method_entry(Register cache, Register index, int bcp_offset = 1);

-#ifdef ASSERT
+  void verify_field_offset(Register reg) NOT_DEBUG_RETURN;
  void verify_access_flags(Register access_flags, uint32_t flag,
-                           const char* msg, bool stop_by_hit = true);
-  void verify_frame_setup();
-#endif
+                           const char* msg, bool stop_by_hit = true) NOT_DEBUG_RETURN;
+  void verify_frame_setup() NOT_DEBUG_RETURN;
 };

 #endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@ -1092,40 +1092,40 @@ RegMask _NO_SPECIAL_NO_FP_PTR_REG_mask;

 void reg_mask_init() {

-  _ANY_REG32_mask = _ALL_REG32_mask;
-  _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
+  _ANY_REG32_mask.assignFrom(_ALL_REG32_mask);
+  _ANY_REG32_mask.remove(OptoReg::as_OptoReg(x0->as_VMReg()));

-  _ANY_REG_mask = _ALL_REG_mask;
-  _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
+  _ANY_REG_mask.assignFrom(_ALL_REG_mask);
+  _ANY_REG_mask.subtract(_ZR_REG_mask);

-  _PTR_REG_mask = _ALL_REG_mask;
-  _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
+  _PTR_REG_mask.assignFrom(_ALL_REG_mask);
+  _PTR_REG_mask.subtract(_ZR_REG_mask);

-  _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
-  _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
+  _NO_SPECIAL_REG32_mask.assignFrom(_ALL_REG32_mask);
+  _NO_SPECIAL_REG32_mask.subtract(_NON_ALLOCATABLE_REG32_mask);

-  _NO_SPECIAL_REG_mask = _ALL_REG_mask;
-  _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
+  _NO_SPECIAL_REG_mask.assignFrom(_ALL_REG_mask);
+  _NO_SPECIAL_REG_mask.subtract(_NON_ALLOCATABLE_REG_mask);

-  _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
-  _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
+  _NO_SPECIAL_PTR_REG_mask.assignFrom(_ALL_REG_mask);
+  _NO_SPECIAL_PTR_REG_mask.subtract(_NON_ALLOCATABLE_REG_mask);

  // x27 is not allocatable when compressed oops is on
  if (UseCompressedOops) {
-    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
-    _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
-    _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
+    _NO_SPECIAL_REG32_mask.remove(OptoReg::as_OptoReg(x27->as_VMReg()));
+    _NO_SPECIAL_REG_mask.remove(OptoReg::as_OptoReg(x27->as_VMReg()));
+    _NO_SPECIAL_PTR_REG_mask.remove(OptoReg::as_OptoReg(x27->as_VMReg()));
  }

  // x8 is not allocatable when PreserveFramePointer is on
  if (PreserveFramePointer) {
-    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
-    _NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
-    _NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
+    _NO_SPECIAL_REG32_mask.remove(OptoReg::as_OptoReg(x8->as_VMReg()));
+    _NO_SPECIAL_REG_mask.remove(OptoReg::as_OptoReg(x8->as_VMReg()));
+    _NO_SPECIAL_PTR_REG_mask.remove(OptoReg::as_OptoReg(x8->as_VMReg()));
  }

-  _NO_SPECIAL_NO_FP_PTR_REG_mask = _NO_SPECIAL_PTR_REG_mask;
-  _NO_SPECIAL_NO_FP_PTR_REG_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
+  _NO_SPECIAL_NO_FP_PTR_REG_mask.assignFrom(_NO_SPECIAL_PTR_REG_mask);
+  _NO_SPECIAL_NO_FP_PTR_REG_mask.remove(OptoReg::as_OptoReg(x8->as_VMReg()));
 }

 void PhaseOutput::pd_perform_mach_node_analysis() {
@ -1326,7 +1326,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
  }

 //=============================================================================
-const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;

 int ConstantTable::calculate_table_base_offset() const {
  return 0;  // absolute addressing, no offset
@ -2104,10 +2104,10 @@ uint Matcher::int_pressure_limit()
  // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
  // derived pointers and lastly fail to spill after reaching maximum
  // number of iterations. Lowering the default pressure threshold to
-  // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
+  // (_NO_SPECIAL_REG32_mask.size() minus 1) forces CallNode to become
  // a high register pressure area of the code so that split_DEF can
  // generate DefinitionSpillCopy for the derived pointer.
-  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
+  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.size() - 1;
  if (!PreserveFramePointer) {
    // When PreserveFramePointer is off, frame pointer is allocatable,
    // but different from other SOC registers, it is excluded from
@ -2122,34 +2122,34 @@ uint Matcher::int_pressure_limit()
 uint Matcher::float_pressure_limit()
 {
  // _FLOAT_REG_mask is generated by adlc from the float_reg register class.
-  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
+  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.size() : FLOATPRESSURE;
 }

 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
  return false;
 }

-RegMask Matcher::divI_proj_mask() {
+const RegMask& Matcher::divI_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for MODI projection of divmodI.
-RegMask Matcher::modI_proj_mask() {
+const RegMask& Matcher::modI_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for DIVL projection of divmodL.
-RegMask Matcher::divL_proj_mask() {
+const RegMask& Matcher::divL_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 // Register for MODL projection of divmodL.
-RegMask Matcher::modL_proj_mask() {
+const RegMask& Matcher::modL_proj_mask() {
  ShouldNotReachHere();
-  return RegMask();
+  return RegMask::EMPTY;
 }

 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
@ -1073,9 +1073,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
  }

  // start execution
-#ifdef ASSERT
  __ verify_frame_setup();
-#endif

  // jvmti support
  __ notify_method_entry();
@ -1541,9 +1539,7 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
  }

  // start execution
-#ifdef ASSERT
  __ verify_frame_setup();
-#endif

  // jvmti support
  __ notify_method_entry();
--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
@ -133,6 +133,7 @@ Address TemplateTable::at_bcp(int offset) {
 void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
                                   Register temp_reg, bool load_bc_into_bc_reg /*=true*/,
                                   int byte_no) {
+  assert_different_registers(bc_reg, temp_reg);
  if (!RewriteBytecodes) { return; }
  Label L_patch_done;

@ -196,7 +197,11 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
  __ bind(L_okay);
 #endif

-  // patch bytecode
+  // Patch bytecode with release store to coordinate with ResolvedFieldEntry loads
+  // in fast bytecode codelets. load_field_entry has a memory barrier that gains
+  // the needed ordering, together with control dependency on entering the fast codelet
+  // itself.
+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
  __ sb(bc_reg, at_bcp(0));
  __ bind(L_patch_done);
 }
@ -3028,6 +3033,7 @@ void TemplateTable::fast_storefield(TosState state) {

  // X11: field offset, X12: field holder, X13: flags
  load_resolved_field_entry(x12, x12, noreg, x11, x13);
+  __ verify_field_offset(x11);

  {
    Label notVolatile;
@ -3115,6 +3121,8 @@ void TemplateTable::fast_accessfield(TosState state) {
  __ load_field_entry(x12, x11);

  __ load_sized_value(x11, Address(x12, in_bytes(ResolvedFieldEntry::field_offset_offset())), sizeof(int), true /*is_signed*/);
+  __ verify_field_offset(x11);
+
  __ load_unsigned_byte(x13, Address(x12, in_bytes(ResolvedFieldEntry::flags_offset())));

  // x10: object
@ -3170,7 +3178,9 @@ void TemplateTable::fast_xaccess(TosState state) {
  __ ld(x10, aaddress(0));
  // access constant pool cache
  __ load_field_entry(x12, x13, 2);
+
  __ load_sized_value(x11, Address(x12, in_bytes(ResolvedFieldEntry::field_offset_offset())), sizeof(int), true /*is_signed*/);
+  __ verify_field_offset(x11);

  // make sure exception is reported in correct bcp range (getfield is
  // next instruction)
--- a/src/hotspot/cpu/s390/c1_globals_s390.hpp
+++ b/src/hotspot/cpu/s390/c1_globals_s390.hpp
@ -53,7 +53,6 @@ define_pd_global(size_t,   CodeCacheMinBlockLength,      1);
 define_pd_global(size_t,   CodeCacheMinimumUseSpace,     400*K);
 define_pd_global(bool,     NeverActAsServerClassMachine, true);
 define_pd_global(size_t,   NewSizeThreadIncrease,        16*K);
-define_pd_global(uint64_t, MaxRAM,                       1ULL*G);
 define_pd_global(size_t,   InitialCodeCacheSize,         160*K);
 #endif // !COMPILER2

--- a/src/hotspot/cpu/s390/c2_globals_s390.hpp
+++ b/src/hotspot/cpu/s390/c2_globals_s390.hpp
@ -74,7 +74,6 @@ define_pd_global(size_t, NonNMethodCodeHeapSize,     5*M);
 define_pd_global(size_t, CodeCacheExpansionSize,     64*K);

 // Ergonomics related flags
-define_pd_global(uint64_t, MaxRAM,                   128ULL*G);
 define_pd_global(size_t, CodeCacheMinBlockLength,    4);
 define_pd_global(size_t, CodeCacheMinimumUseSpace,   400*K);

--- a/src/hotspot/cpu/s390/gc/g1/g1_s390.ad
+++ b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad
@ -356,7 +356,7 @@ instruct g1CompareAndExchangeP(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval

    __ z_lgr($res$$Register, $oldval$$Register); // previous content

-    __ z_csg($oldval$$Register, $newval$$Register, 0, $mem_ptr$$reg);
+    __ z_csg($res$$Register, $newval$$Register, 0, $mem_ptr$$reg);

    write_barrier_post(masm, this,
                       $mem_ptr$$Register /* store_addr */,
--- a/src/hotspot/cpu/s390/javaFrameAnchor_s390.hpp
+++ b/src/hotspot/cpu/s390/javaFrameAnchor_s390.hpp
@ -35,38 +35,32 @@
  //  3 - restoring an old state (javaCalls).

  inline void clear(void) {
+    // No hardware barriers are necessary. All members are volatile and the profiler
+    // is run from a signal handler and only observers the thread its running on.
+
    // Clearing _last_Java_sp must be first.
-    OrderAccess::release();
+
    _last_Java_sp = nullptr;
-    // Fence?
-    OrderAccess::fence();

    _last_Java_pc = nullptr;
  }

  inline void set(intptr_t* sp, address pc) {
    _last_Java_pc = pc;
-
-    OrderAccess::release();
    _last_Java_sp = sp;
  }

  void copy(JavaFrameAnchor* src) {
-    // In order to make sure the transition state is valid for "this"
+    // No hardware barriers are necessary. All members are volatile and the profiler
+    // is run from a signal handler and only observers the thread its running on.
+
    // we must clear _last_Java_sp before copying the rest of the new data.
-    // Hack Alert: Temporary bugfix for 4717480/4721647
-    // To act like previous version (pd_cache_state) don't null _last_Java_sp
-    // unless the value is changing.
-    //
    if (_last_Java_sp != src->_last_Java_sp) {
-      OrderAccess::release();
      _last_Java_sp = nullptr;
-      OrderAccess::fence();
    }
    _last_Java_pc = src->_last_Java_pc;
    // Must be last so profiler will always see valid frame if has_last_frame() is true.

-    OrderAccess::release();
    _last_Java_sp = src->_last_Java_sp;
  }

@ -80,7 +74,7 @@
  intptr_t* last_Java_fp(void)        { return nullptr; }

  intptr_t* last_Java_sp() const      { return _last_Java_sp; }
-  void set_last_Java_sp(intptr_t* sp) { OrderAccess::release(); _last_Java_sp = sp; }
+  void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; }

  address last_Java_pc(void)          { return _last_Java_pc; }

--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@ -1961,22 +1961,22 @@ bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
 }

 // Register for DIVI projection of divmodI
-RegMask Matcher::divI_proj_mask() {
+const RegMask& Matcher::divI_proj_mask() {
  return _Z_RARG4_INT_REG_mask;
 }

 // Register for MODI projection of divmodI
-RegMask Matcher::modI_proj_mask() {
+const RegMask& Matcher::modI_proj_mask() {
  return _Z_RARG3_INT_REG_mask;
 }

 // Register for DIVL projection of divmodL
-RegMask Matcher::divL_proj_mask() {
+const RegMask& Matcher::divL_proj_mask() {
  return _Z_RARG4_LONG_REG_mask;
 }

 // Register for MODL projection of divmodL
-RegMask Matcher::modL_proj_mask() {
+const RegMask& Matcher::modL_proj_mask() {
  return _Z_RARG3_LONG_REG_mask;
 }

--- a/src/hotspot/cpu/x86/c1_globals_x86.hpp
+++ b/src/hotspot/cpu/x86/c1_globals_x86.hpp
@ -52,7 +52,6 @@ define_pd_global(size_t, CodeCacheExpansionSize,       32*K );
 define_pd_global(size_t, CodeCacheMinBlockLength,      1    );
 define_pd_global(size_t, CodeCacheMinimumUseSpace,     400*K);
 define_pd_global(bool,   NeverActAsServerClassMachine, true );
-define_pd_global(uint64_t, MaxRAM,                    1ULL*G);
 define_pd_global(bool,   CICompileOSR,                 true );
 #endif // !COMPILER2
 define_pd_global(bool, UseTypeProfile,                 false);
--- a/src/hotspot/cpu/x86/c2_globals_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_globals_x86.hpp
@ -52,9 +52,6 @@ define_pd_global(intx,  LoopUnrollLimit,             60);
 // InitialCodeCacheSize derived from specjbb2000 run.
 define_pd_global(size_t, InitialCodeCacheSize,       2496*K); // Integral multiple of CodeCacheExpansionSize
 define_pd_global(size_t, CodeCacheExpansionSize,     64*K);
-
-// Ergonomics related flags
-define_pd_global(uint64_t, MaxRAM,                   128ULL*G);
 #else
 define_pd_global(intx,  InteriorEntryAlignment,      4);
 define_pd_global(size_t, NewSizeThreadIncrease,      4*K);
@ -62,9 +59,6 @@ define_pd_global(intx,  LoopUnrollLimit,             50);     // Design center r
 // InitialCodeCacheSize derived from specjbb2000 run.
 define_pd_global(size_t, InitialCodeCacheSize,       2304*K); // Integral multiple of CodeCacheExpansionSize
 define_pd_global(size_t, CodeCacheExpansionSize,     32*K);
-
-// Ergonomics related flags
-define_pd_global(uint64_t, MaxRAM,                   4ULL*G);
 #endif // AMD64
 define_pd_global(intx, RegisterCostAreaRatio,        16000);

--- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp
@ -471,33 +471,33 @@ void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
  // Create mask of caller saved registers that need to
  // be saved/restored if live
  RegMask caller_saved;
-  caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg()));
-  caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
-  caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
-  caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
-  caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
-  caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg()));
-  caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg()));
-  caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
-  caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
+  caller_saved.insert(OptoReg::as_OptoReg(rax->as_VMReg()));
+  caller_saved.insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
+  caller_saved.insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
+  caller_saved.insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
+  caller_saved.insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
+  caller_saved.insert(OptoReg::as_OptoReg(r8->as_VMReg()));
+  caller_saved.insert(OptoReg::as_OptoReg(r9->as_VMReg()));
+  caller_saved.insert(OptoReg::as_OptoReg(r10->as_VMReg()));
+  caller_saved.insert(OptoReg::as_OptoReg(r11->as_VMReg()));

  if (UseAPX) {
-    caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg()));
-    caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r16->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r17->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r18->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r19->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r20->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r21->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r22->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r23->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r24->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r25->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r26->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r27->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r28->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r29->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r30->as_VMReg()));
+    caller_saved.insert(OptoReg::as_OptoReg(r31->as_VMReg()));
  }

  int gp_spill_size = 0;
@ -511,7 +511,7 @@ void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
    const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);

    if (vm_reg->is_Register()) {
-      if (caller_saved.Member(opto_reg)) {
+      if (caller_saved.member(opto_reg)) {
        _gp_registers.append(vm_reg->as_Register());
        gp_spill_size += 8;
      }
--- a/src/hotspot/cpu/x86/stubDeclarations_x86.hpp
+++ b/src/hotspot/cpu/x86/stubDeclarations_x86.hpp
@ -41,10 +41,6 @@
  do_stub(initial, verify_mxcsr)                                        \
  do_arch_entry(x86, initial, verify_mxcsr, verify_mxcsr_entry,         \
                verify_mxcsr_entry)                                     \
-  do_stub(initial, get_previous_sp)                                     \
-  do_arch_entry(x86, initial, get_previous_sp,                          \
-                get_previous_sp_entry,                                  \
-                get_previous_sp_entry)                                  \
  do_stub(initial, f2i_fixup)                                           \
  do_arch_entry(x86, initial, f2i_fixup, f2i_fixup, f2i_fixup)          \
  do_stub(initial, f2l_fixup)                                           \
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@ -541,22 +541,6 @@ address StubGenerator::generate_orderaccess_fence() {
 }


-// Support for intptr_t get_previous_sp()
-//
-// This routine is used to find the previous stack pointer for the
-// caller.
-address StubGenerator::generate_get_previous_sp() {
-  StubId stub_id = StubId::stubgen_get_previous_sp_id;
-  StubCodeMark mark(this, stub_id);
-  address start = __ pc();
-
-  __ movptr(rax, rsp);
-  __ addptr(rax, 8); // return address is at the top of the stack.
-  __ ret(0);
-
-  return start;
-}
-
 //----------------------------------------------------------------------------------------------------
 // Support for void verify_mxcsr()
 //
@ -4083,8 +4067,6 @@ void StubGenerator::generate_initial_stubs() {
  StubRoutines::_catch_exception_entry = generate_catch_exception();

  // platform dependent
-  StubRoutines::x86::_get_previous_sp_entry = generate_get_previous_sp();
-
  StubRoutines::x86::_verify_mxcsr_entry    = generate_verify_mxcsr();

  StubRoutines::x86::_f2i_fixup             = generate_f2i_fixup();
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
@ -68,12 +68,6 @@ class StubGenerator: public StubCodeGenerator {
  // Support for intptr_t OrderAccess::fence()
  address generate_orderaccess_fence();

-  // Support for intptr_t get_previous_sp()
-  //
-  // This routine is used to find the previous stack pointer for the
-  // caller.
-  address generate_get_previous_sp();
-
  //----------------------------------------------------------------------------------------------------
  // Support for void verify_mxcsr()
  //
@ -393,6 +387,8 @@ class StubGenerator: public StubCodeGenerator {
                                     XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, XMMRegister xmm8);
  void ghash_last_8_avx2(Register subkeyHtbl);

+  void check_key_offset(Register key, int offset, int load_size);
+
  // Load key and shuffle operation
  void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
  void ev_load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
@ -1759,25 +1759,43 @@ void StubGenerator::roundDeclast(XMMRegister xmm_reg) {
  __ vaesdeclast(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit);
 }

+// Check incoming byte offset against the int[] len. key is the pointer to the int[0].
+// This check happens often, so it is important for it to be very compact.
+void StubGenerator::check_key_offset(Register key, int offset, int load_size) {
+#ifdef ASSERT
+  Address key_length(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
+  assert((offset + load_size) % 4 == 0, "Alignment is good: %d + %d", offset, load_size);
+  int end_offset = (offset + load_size) / 4;
+  Label L_good;
+  __ cmpl(key_length, end_offset);
+  __ jccb(Assembler::greaterEqual, L_good);
+  __ hlt();
+  __ bind(L_good);
+#endif
+}

 // Utility routine for loading a 128-bit key word in little endian format
 void StubGenerator::load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask) {
+  check_key_offset(key, offset, 16);
  __ movdqu(xmmdst, Address(key, offset));
  __ pshufb(xmmdst, xmm_shuf_mask);
 }

 void StubGenerator::load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch) {
+  check_key_offset(key, offset, 16);
  __ movdqu(xmmdst, Address(key, offset));
  __ pshufb(xmmdst, ExternalAddress(key_shuffle_mask_addr()), rscratch);
 }

 void StubGenerator::ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask) {
+  check_key_offset(key, offset, 16);
  __ movdqu(xmmdst, Address(key, offset));
  __ pshufb(xmmdst, xmm_shuf_mask);
  __ evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
 }

 void StubGenerator::ev_load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch) {
+  check_key_offset(key, offset, 16);
  __ movdqu(xmmdst, Address(key, offset));
  __ pshufb(xmmdst, ExternalAddress(key_shuffle_mask_addr()), rscratch);
  __ evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
@ -3205,12 +3223,12 @@ void StubGenerator::ghash16_encrypt_parallel16_avx512(Register in, Register out,

  //AES round 9
  roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
-  ev_load_key(AESKEY2, key, 11 * 16, rbx);
  //AES rounds up to 11 (AES192) or 13 (AES256)
  //AES128 is done
  __ cmpl(NROUNDS, 52);
  __ jcc(Assembler::less, last_aes_rnd);
  __ bind(aes_192);
+  ev_load_key(AESKEY2, key, 11 * 16, rbx);
  roundEncode(AESKEY1, B00_03, B04_07, B08_11, B12_15);
  ev_load_key(AESKEY1, key, 12 * 16, rbx);
  roundEncode(AESKEY2, B00_03, B04_07, B08_11, B12_15);
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@ -497,96 +497,96 @@ void reg_mask_init() {

  // _ALL_REG_mask is generated by adlc from the all_reg register class below.
  // We derive a number of subsets from it.
-  _ANY_REG_mask = _ALL_REG_mask;
+  _ANY_REG_mask.assignFrom(_ALL_REG_mask);

  if (PreserveFramePointer) {
-    _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
-    _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
+    _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
+    _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  }
  if (need_r12_heapbase()) {
-    _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
-    _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
+    _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
+    _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
  }

-  _PTR_REG_mask = _ANY_REG_mask;
-  _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
-  _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
-  _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
-  _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
+  _PTR_REG_mask.assignFrom(_ANY_REG_mask);
+  _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
+  _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
+  _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
+  _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
  if (!UseAPX) {
    for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
-      _PTR_REG_mask.Remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
-      _PTR_REG_mask.Remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
+      _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
+      _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
    }
  }

-  _STACK_OR_PTR_REG_mask = _PTR_REG_mask;
-  _STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
+  _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
+  _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());

-  _PTR_REG_NO_RBP_mask = _PTR_REG_mask;
-  _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
-  _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
+  _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
+  _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
+  _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));

-  _PTR_NO_RAX_REG_mask = _PTR_REG_mask;
-  _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
-  _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
+  _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
+  _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
+  _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));

-  _PTR_NO_RAX_RBX_REG_mask = _PTR_NO_RAX_REG_mask;
-  _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
-  _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
+  _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
+  _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
+  _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));


-  _LONG_REG_mask = _PTR_REG_mask;
-  _STACK_OR_LONG_REG_mask = _LONG_REG_mask;
-  _STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
+  _LONG_REG_mask.assignFrom(_PTR_REG_mask);
+  _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
+  _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());

-  _LONG_NO_RAX_RDX_REG_mask = _LONG_REG_mask;
-  _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
-  _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
-  _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
-  _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
+  _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
+  _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
+  _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
+  _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
+  _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));

-  _LONG_NO_RCX_REG_mask = _LONG_REG_mask;
-  _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
-  _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
+  _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
+  _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
+  _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));

-  _LONG_NO_RBP_R13_REG_mask = _LONG_REG_mask;
-  _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
-  _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
-  _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
-  _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
+  _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
+  _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
+  _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
+  _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
+  _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));

-  _INT_REG_mask = _ALL_INT_REG_mask;
+  _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
  if (!UseAPX) {
    for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
-      _INT_REG_mask.Remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
+      _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
    }
  }

  if (PreserveFramePointer) {
-    _INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
+    _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  }
  if (need_r12_heapbase()) {
-    _INT_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
+    _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  }

-  _STACK_OR_INT_REG_mask = _INT_REG_mask;
-  _STACK_OR_INT_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
+  _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
+  _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());

-  _INT_NO_RAX_RDX_REG_mask = _INT_REG_mask;
-  _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
-  _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
+  _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
+  _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
+  _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));

-  _INT_NO_RCX_REG_mask = _INT_REG_mask;
-  _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
+  _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
+  _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));

-  _INT_NO_RBP_R13_REG_mask = _INT_REG_mask;
-  _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
-  _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
+  _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
+  _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
+  _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));

  // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
  // from the float_reg_legacy/float_reg_evex register class.
-  _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
+  _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 }

 static bool generate_vzeroupper(Compile* C) {
@ -756,7 +756,7 @@ static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 }

 //=============================================================================
-const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;

 int ConstantTable::calculate_table_base_offset() const {
  return 0;  // absolute addressing, no offset
@ -1658,7 +1658,7 @@ bool Matcher::is_spillable_arg(int reg)

 uint Matcher::int_pressure_limit()
 {
-  return (INTPRESSURE == -1) ? _INT_REG_mask.Size() : INTPRESSURE;
+  return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 }

 uint Matcher::float_pressure_limit()
@ -1666,7 +1666,7 @@ uint Matcher::float_pressure_limit()
  // After experiment around with different values, the following default threshold
  // works best for LCM's register pressure scheduling on x64.
  uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
-  uint default_float_pressure_threshold = _FLOAT_REG_mask.Size() - dec_count;
+  uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
  return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 }

@ -1678,22 +1678,22 @@ bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 }

 // Register for DIVI projection of divmodI
-RegMask Matcher::divI_proj_mask() {
+const RegMask& Matcher::divI_proj_mask() {
  return INT_RAX_REG_mask();
 }

 // Register for MODI projection of divmodI
-RegMask Matcher::modI_proj_mask() {
+const RegMask& Matcher::modI_proj_mask() {
  return INT_RDX_REG_mask();
 }

 // Register for DIVL projection of divmodL
-RegMask Matcher::divL_proj_mask() {
+const RegMask& Matcher::divL_proj_mask() {
  return LONG_RAX_REG_mask();
 }

 // Register for MODL projection of divmodL
-RegMask Matcher::modL_proj_mask() {
+const RegMask& Matcher::modL_proj_mask() {
  return LONG_RDX_REG_mask();
 }

--- a/src/hotspot/cpu/zero/frame_zero.cpp
+++ b/src/hotspot/cpu/zero/frame_zero.cpp
@ -245,8 +245,6 @@ void frame::zero_print_on_error(int           frame_index,
    os::snprintf_checked(fieldbuf, buflen, "word[%d]", offset);
    os::snprintf_checked(valuebuf, buflen, PTR_FORMAT, *addr);
    zeroframe()->identify_word(frame_index, offset, fieldbuf, valuebuf, buflen);
-    fieldbuf[buflen - 1] = '\0';
-    valuebuf[buflen - 1] = '\0';

    // Print the result
    st->print_cr(" " PTR_FORMAT ": %-21s = %s", p2i(addr), fieldbuf, valuebuf);
--- a/src/hotspot/cpu/zero/vm_version_zero.cpp
+++ b/src/hotspot/cpu/zero/vm_version_zero.cpp
@ -116,9 +116,8 @@ void VM_Version::initialize() {
  }

  // Enable error context decoding on known platforms
-#if defined(IA32) || defined(AMD64) || defined(ARM) || \
-    defined(AARCH64) || defined(PPC) || defined(RISCV) || \
-    defined(S390)
+#if defined(AMD64) || defined(ARM) || defined(AARCH64) || \
+    defined(PPC) || defined(RISCV) || defined(S390)
  if (FLAG_IS_DEFAULT(DecodeErrorContext)) {
    FLAG_SET_DEFAULT(DecodeErrorContext, true);
  }
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@ -1054,7 +1054,7 @@ static void* dll_load_library(const char *filename, int *eno, char *ebuf, int eb
      error_report = "dlerror returned no error description";
    }
    if (ebuf != nullptr && ebuflen > 0) {
-      os::snprintf_checked(ebuf, ebuflen - 1, "%s, LIBPATH=%s, LD_LIBRARY_PATH=%s : %s",
+      os::snprintf_checked(ebuf, ebuflen, "%s, LIBPATH=%s, LD_LIBRARY_PATH=%s : %s",
                           filename, ::getenv("LIBPATH"), ::getenv("LD_LIBRARY_PATH"), error_report);
    }
    Events::log_dll_message(nullptr, "Loading shared library %s failed, %s", filename, error_report);
--- a/src/hotspot/os/aix/porting_aix.cpp
+++ b/src/hotspot/os/aix/porting_aix.cpp
@ -1,5 +1,6 @@
 /*
 * Copyright (c) 2012, 2024 SAP SE. All rights reserved.
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -1154,7 +1155,7 @@ bool os::pd_dll_unload(void* libhandle, char* ebuf, int ebuflen) {
        error_report = "dlerror returned no error description";
      }
      if (ebuf != nullptr && ebuflen > 0) {
-        os::snprintf_checked(ebuf, ebuflen - 1, "%s", error_report);
+        os::snprintf_checked(ebuf, ebuflen, "%s", error_report);
      }
      assert(false, "os::pd_dll_unload() ::dlclose() failed");
    }
--- a/src/hotspot/os/bsd/gc/z/zNUMA_bsd.cpp
+++ b/src/hotspot/os/bsd/gc/z/zNUMA_bsd.cpp
@ -46,3 +46,7 @@ uint32_t ZNUMA::memory_id(uintptr_t addr) {
  // NUMA support not enabled, assume everything belongs to node zero
  return 0;
 }
+
+int ZNUMA::numa_id_to_node(uint32_t numa_id) {
+  ShouldNotCallThis();
+}
--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@ -231,8 +231,6 @@ size_t os::rss() {
 // Cpu architecture string
 #if   defined(ZERO)
 static char cpu_arch[] = ZERO_LIBARCH;
-#elif defined(IA32)
-static char cpu_arch[] = "i386";
 #elif defined(AMD64)
 static char cpu_arch[] = "amd64";
 #elif defined(ARM)
@ -1011,7 +1009,6 @@ bool os::dll_address_to_library_name(address addr, char* buf,
 // same architecture as Hotspot is running on

 void *os::Bsd::dlopen_helper(const char *filename, int mode, char *ebuf, int ebuflen) {
-#ifndef IA32
  bool ieee_handling = IEEE_subnormal_handling_OK();
  if (!ieee_handling) {
    Events::log_dll_message(nullptr, "IEEE subnormal handling check failed before loading %s", filename);
@ -1034,14 +1031,9 @@ void *os::Bsd::dlopen_helper(const char *filename, int mode, char *ebuf, int ebu
  // numerical "accuracy", but we need to protect Java semantics first
  // and foremost. See JDK-8295159.

-  // This workaround is ineffective on IA32 systems because the MXCSR
-  // register (which controls flush-to-zero mode) is not stored in the
-  // legacy fenv.
-
  fenv_t default_fenv;
  int rtn = fegetenv(&default_fenv);
  assert(rtn == 0, "fegetenv must succeed");
-#endif // IA32

  void* result;
  JFR_ONLY(NativeLibraryLoadEvent load_event(filename, &result);)
@ -1061,7 +1053,6 @@ void *os::Bsd::dlopen_helper(const char *filename, int mode, char *ebuf, int ebu
  } else {
    Events::log_dll_message(nullptr, "Loaded shared library %s", filename);
    log_info(os)("shared library load of %s was successful", filename);
-#ifndef IA32
    if (! IEEE_subnormal_handling_OK()) {
      // We just dlopen()ed a library that mangled the floating-point
      // flags. Silently fix things now.
@ -1086,7 +1077,6 @@ void *os::Bsd::dlopen_helper(const char *filename, int mode, char *ebuf, int ebu
        assert(false, "fesetenv didn't work");
      }
    }
-#endif // IA32
  }

  return result;
@ -1195,9 +1185,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
    {EM_68K,         EM_68K,     ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}
  };

-  #if  (defined IA32)
-  static  Elf32_Half running_arch_code=EM_386;
-  #elif   (defined AMD64)
+  #if    (defined AMD64)
  static  Elf32_Half running_arch_code=EM_X86_64;
  #elif  (defined __powerpc64__)
  static  Elf32_Half running_arch_code=EM_PPC64;
@ -1219,7 +1207,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
  static  Elf32_Half running_arch_code=EM_68K;
  #else
    #error Method os::dll_load requires that one of following is defined:\
-         IA32, AMD64, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K
+         AMD64, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K
  #endif

  // Identify compatibility class for VM's architecture and library's architecture
@ -2495,7 +2483,7 @@ bool os::pd_dll_unload(void* libhandle, char* ebuf, int ebuflen) {
      error_report = "dlerror returned no error description";
    }
    if (ebuf != nullptr && ebuflen > 0) {
-      os::snprintf_checked(ebuf, ebuflen - 1, "%s", error_report);
+      os::snprintf_checked(ebuf, ebuflen, "%s", error_report);
    }
  }

--- a/src/hotspot/os/linux/gc/z/zNUMA_linux.cpp
+++ b/src/hotspot/os/linux/gc/z/zNUMA_linux.cpp
@ -32,12 +32,35 @@
 #include "runtime/os.hpp"
 #include "utilities/debug.hpp"

+static uint* z_numa_id_to_node = nullptr;
+static uint32_t* z_node_to_numa_id = nullptr;
+
 void ZNUMA::pd_initialize() {
  _enabled = UseNUMA;

+  size_t configured_nodes = 0;
+
+  if (UseNUMA) {
+    const size_t max_nodes = os::Linux::numa_num_configured_nodes();
+    z_numa_id_to_node = NEW_C_HEAP_ARRAY(uint, max_nodes, mtGC);
+    configured_nodes = os::numa_get_leaf_groups(z_numa_id_to_node, 0);
+
+    z_node_to_numa_id = NEW_C_HEAP_ARRAY(uint32_t, max_nodes, mtGC);
+
+    // Fill the array with invalid NUMA ids
+    for (uint32_t i = 0; i < max_nodes; i++) {
+      z_node_to_numa_id[i] = (uint32_t)-1;
+    }
+
+    // Fill the reverse mappings
+    for (uint32_t i = 0; i < configured_nodes; i++) {
+      z_node_to_numa_id[z_numa_id_to_node[i]] = i;
+    }
+  }
+
  // UseNUMA and is_faked() are mutually excluded in zArguments.cpp.
  _count = UseNUMA
-      ? os::Linux::numa_max_node() + 1
+      ? configured_nodes
      : !FLAG_IS_DEFAULT(ZFakeNUMA)
            ? ZFakeNUMA
            : 1;  // No NUMA nodes
@ -54,7 +77,7 @@ uint32_t ZNUMA::id() {
    return 0;
  }

-  return os::Linux::get_node_by_cpu(ZCPU::id());
+  return z_node_to_numa_id[os::Linux::get_node_by_cpu(ZCPU::id())];
 }

 uint32_t ZNUMA::memory_id(uintptr_t addr) {
@ -63,14 +86,21 @@ uint32_t ZNUMA::memory_id(uintptr_t addr) {
    return 0;
  }

-  uint32_t id = (uint32_t)-1;
+  int node = -1;

-  if (ZSyscall::get_mempolicy((int*)&id, nullptr, 0, (void*)addr, MPOL_F_NODE | MPOL_F_ADDR) == -1) {
+  if (ZSyscall::get_mempolicy(&node, nullptr, 0, (void*)addr, MPOL_F_NODE | MPOL_F_ADDR) == -1) {
    ZErrno err;
    fatal("Failed to get NUMA id for memory at " PTR_FORMAT " (%s)", addr, err.to_string());
  }

-  assert(id < _count, "Invalid NUMA id");
+  DEBUG_ONLY(const int max_nodes = os::Linux::numa_num_configured_nodes();)
+  assert(node < max_nodes, "NUMA node is out of bounds node=%d, max=%d", node, max_nodes);

-  return id;
+  return z_node_to_numa_id[node];
+}
+
+int ZNUMA::numa_id_to_node(uint32_t numa_id) {
+  assert(numa_id < _count, "NUMA id out of range 0 <= %ud <= %ud", numa_id, _count);
+
+  return (int)z_numa_id_to_node[numa_id];
 }
--- a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp
+++ b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp
@ -629,7 +629,7 @@ retry:

 size_t ZPhysicalMemoryBacking::commit_numa_preferred(zbacking_offset offset, size_t length, uint32_t numa_id) const {
  // Setup NUMA policy to allocate memory from a preferred node
-  os::Linux::numa_set_preferred((int)numa_id);
+  os::Linux::numa_set_preferred(ZNUMA::numa_id_to_node(numa_id));

  const size_t committed = commit_default(offset, length);

--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@ -1795,9 +1795,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
    {EM_LOONGARCH,   EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LoongArch"},
  };

-#if  (defined IA32)
-  static  Elf32_Half running_arch_code=EM_386;
-#elif   (defined AMD64) || (defined X32)
+#if    (defined AMD64)
  static  Elf32_Half running_arch_code=EM_X86_64;
 #elif  (defined __sparc) && (defined _LP64)
  static  Elf32_Half running_arch_code=EM_SPARCV9;
@ -1831,7 +1829,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
  static  Elf32_Half running_arch_code=EM_LOONGARCH;
 #else
    #error Method os::dll_load requires that one of following is defined:\
-        AARCH64, ALPHA, ARM, AMD64, IA32, LOONGARCH64, M68K, MIPS, MIPSEL, PARISC, __powerpc__, __powerpc64__, RISCV, S390, SH, __sparc
+        AARCH64, ALPHA, ARM, AMD64, LOONGARCH64, M68K, MIPS, MIPSEL, PARISC, __powerpc__, __powerpc64__, RISCV, S390, SH, __sparc
 #endif

  // Identify compatibility class for VM's architecture and library's architecture
@ -1893,7 +1891,6 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
 }

 void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
-#ifndef IA32
  bool ieee_handling = IEEE_subnormal_handling_OK();
  if (!ieee_handling) {
    Events::log_dll_message(nullptr, "IEEE subnormal handling check failed before loading %s", filename);
@ -1916,14 +1913,9 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
  // numerical "accuracy", but we need to protect Java semantics first
  // and foremost. See JDK-8295159.

-  // This workaround is ineffective on IA32 systems because the MXCSR
-  // register (which controls flush-to-zero mode) is not stored in the
-  // legacy fenv.
-
  fenv_t default_fenv;
  int rtn = fegetenv(&default_fenv);
  assert(rtn == 0, "fegetenv must succeed");
-#endif // IA32

  void* result;
  JFR_ONLY(NativeLibraryLoadEvent load_event(filename, &result);)
@ -1943,7 +1935,6 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
  } else {
    Events::log_dll_message(nullptr, "Loaded shared library %s", filename);
    log_info(os)("shared library load of %s was successful", filename);
-#ifndef IA32
    // Quickly test to make sure subnormals are correctly handled.
    if (! IEEE_subnormal_handling_OK()) {
      // We just dlopen()ed a library that mangled the floating-point flags.
@ -1969,7 +1960,6 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf, int ebuflen) {
        assert(false, "fesetenv didn't work");
      }
    }
-#endif // IA32
  }
  return result;
 }
@ -2613,7 +2603,7 @@ void os::print_memory_info(outputStream* st) {
 // before "flags" so if we find a second "model name", then the
 // "flags" field is considered missing.
 static bool print_model_name_and_flags(outputStream* st, char* buf, size_t buflen) {
-#if defined(IA32) || defined(AMD64)
+#if defined(AMD64)
  // Other platforms have less repetitive cpuinfo files
  FILE *fp = os::fopen("/proc/cpuinfo", "r");
  if (fp) {
@ -2672,7 +2662,7 @@ static void print_sys_devices_cpu_info(outputStream* st) {
  }

  // we miss the cpufreq entries on Power and s390x
-#if defined(IA32) || defined(AMD64)
+#if defined(AMD64)
  _print_ascii_file_h("BIOS frequency limitation", "/sys/devices/system/cpu/cpu0/cpufreq/bios_limit", st);
  _print_ascii_file_h("Frequency switch latency (ns)", "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_transition_latency", st);
  _print_ascii_file_h("Available cpu frequencies", "/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies", st);
@ -2725,7 +2715,7 @@ void os::jfr_report_memory_info() {

 #endif // INCLUDE_JFR

-#if defined(AMD64) || defined(IA32) || defined(X32)
+#if defined(AMD64)
 const char* search_string = "model name";
 #elif defined(M68K)
 const char* search_string = "CPU";
@ -2778,8 +2768,6 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
  strncpy(cpuinfo, "x86_64", length);
 #elif defined(ARM)  // Order wrt. AARCH64 is relevant!
  strncpy(cpuinfo, "ARM", length);
-#elif defined(IA32)
-  strncpy(cpuinfo, "x86_32", length);
 #elif defined(PPC)
  strncpy(cpuinfo, "PPC64", length);
 #elif defined(RISCV)
@ -3079,14 +3067,9 @@ int os::Linux::sched_getcpu_syscall(void) {
  unsigned int cpu = 0;
  long retval = -1;

-#if defined(IA32)
-  #ifndef SYS_getcpu
-    #define SYS_getcpu 318
-  #endif
-  retval = syscall(SYS_getcpu, &cpu, nullptr, nullptr);
-#elif defined(AMD64)
-// Unfortunately we have to bring all these macros here from vsyscall.h
-// to be able to compile on old linuxes.
+#if defined(AMD64)
+  // Unfortunately we have to bring all these macros here from vsyscall.h
+  // to be able to compile on old linuxes.
  #define __NR_vgetcpu 2
  #define VSYSCALL_START (-10UL << 20)
  #define VSYSCALL_SIZE 1024
@ -4459,87 +4442,6 @@ void os::Linux::disable_numa(const char* reason, bool warning) {
  FLAG_SET_ERGO(UseNUMAInterleaving, false);
 }

-#if defined(IA32) && !defined(ZERO)
-/*
- * Work-around (execute code at a high address) for broken NX emulation using CS limit,
- * Red Hat patch "Exec-Shield" (IA32 only).
- *
- * Map and execute at a high VA to prevent CS lazy updates race with SMP MM
- * invalidation.Further code generation by the JVM will no longer cause CS limit
- * updates.
- *
- * Affects IA32: RHEL 5 & 6, Ubuntu 10.04 (LTS), 10.10, 11.04, 11.10, 12.04.
- * @see JDK-8023956
- */
-static void workaround_expand_exec_shield_cs_limit() {
-  assert(os::Linux::initial_thread_stack_bottom() != nullptr, "sanity");
-  size_t page_size = os::vm_page_size();
-
-  /*
-   * JDK-8197429
-   *
-   * Expand the stack mapping to the end of the initial stack before
-   * attempting to install the codebuf.  This is needed because newer
-   * Linux kernels impose a distance of a megabyte between stack
-   * memory and other memory regions.  If we try to install the
-   * codebuf before expanding the stack the installation will appear
-   * to succeed but we'll get a segfault later if we expand the stack
-   * in Java code.
-   *
-   */
-  if (os::is_primordial_thread()) {
-    address limit = os::Linux::initial_thread_stack_bottom();
-    if (! DisablePrimordialThreadGuardPages) {
-      limit += StackOverflow::stack_red_zone_size() +
-               StackOverflow::stack_yellow_zone_size();
-    }
-    os::Linux::expand_stack_to(limit);
-  }
-
-  /*
-   * Take the highest VA the OS will give us and exec
-   *
-   * Although using -(pagesz) as mmap hint works on newer kernel as you would
-   * think, older variants affected by this work-around don't (search forward only).
-   *
-   * On the affected distributions, we understand the memory layout to be:
-   *
-   *   TASK_LIMIT= 3G, main stack base close to TASK_LIMT.
-   *
-   * A few pages south main stack will do it.
-   *
-   * If we are embedded in an app other than launcher (initial != main stack),
-   * we don't have much control or understanding of the address space, just let it slide.
-   */
-  char* hint = (char*)(os::Linux::initial_thread_stack_bottom() -
-                       (StackOverflow::stack_guard_zone_size() + page_size));
-  char* codebuf = os::attempt_reserve_memory_at(hint, page_size, mtThread);
-
-  if (codebuf == nullptr) {
-    // JDK-8197429: There may be a stack gap of one megabyte between
-    // the limit of the stack and the nearest memory region: this is a
-    // Linux kernel workaround for CVE-2017-1000364.  If we failed to
-    // map our codebuf, try again at an address one megabyte lower.
-    hint -= 1 * M;
-    codebuf = os::attempt_reserve_memory_at(hint, page_size, mtThread);
-  }
-
-  if ((codebuf == nullptr) || (!os::commit_memory(codebuf, page_size, true))) {
-    return; // No matter, we tried, best effort.
-  }
-
-  log_info(os)("[CS limit NX emulation work-around, exec code at: %p]", codebuf);
-
-  // Some code to exec: the 'ret' instruction
-  codebuf[0] = 0xC3;
-
-  // Call the code in the codebuf
-  __asm__ volatile("call *%0" : : "r"(codebuf));
-
-  // keep the page mapped so CS limit isn't reduced.
-}
-#endif // defined(IA32) && !defined(ZERO)
-
 // this is called _after_ the global arguments have been parsed
 jint os::init_2(void) {

@ -4560,17 +4462,10 @@ jint os::init_2(void) {
    return JNI_ERR;
  }

-#if defined(IA32) && !defined(ZERO)
-  // Need to ensure we've determined the process's initial stack to
-  // perform the workaround
-  Linux::capture_initial_stack(JavaThread::stack_size_at_create());
-  workaround_expand_exec_shield_cs_limit();
-#else
  suppress_primordial_thread_resolution = Arguments::created_by_java_launcher();
  if (!suppress_primordial_thread_resolution) {
    Linux::capture_initial_stack(JavaThread::stack_size_at_create());
  }
-#endif

  Linux::libpthread_init();
  Linux::sched_getcpu_init();
@ -5443,7 +5338,7 @@ bool os::pd_dll_unload(void* libhandle, char* ebuf, int ebuflen) {
      error_report = "dlerror returned no error description";
    }
    if (ebuf != nullptr && ebuflen > 0) {
-      os::snprintf_checked(ebuf, ebuflen - 1, "%s", error_report);
+      os::snprintf_checked(ebuf, ebuflen, "%s", error_report);
    }
  }

--- a/src/hotspot/os/linux/os_perf_linux.cpp
+++ b/src/hotspot/os/linux/os_perf_linux.cpp
@ -705,11 +705,9 @@ bool SystemProcessInterface::SystemProcesses::ProcessIterator::is_valid_entry(st

  if (atoi(entry->d_name) != 0) {
    jio_snprintf(buffer, PATH_MAX, "/proc/%s", entry->d_name);
-    buffer[PATH_MAX - 1] = '\0';

    if (is_dir(buffer)) {
      jio_snprintf(buffer, PATH_MAX, "/proc/%s/stat", entry->d_name);
-      buffer[PATH_MAX - 1] = '\0';
      if (fsize(buffer, size) != OS_ERR) {
        return true;
      }
@ -724,7 +722,6 @@ void SystemProcessInterface::SystemProcesses::ProcessIterator::get_exe_name() {
  char  buffer[PATH_MAX];

  jio_snprintf(buffer, PATH_MAX, "/proc/%s/stat", _entry->d_name);
-  buffer[PATH_MAX - 1] = '\0';
  if ((fp = os::fopen(buffer, "r")) != nullptr) {
    if (fgets(buffer, PATH_MAX, fp) != nullptr) {
      char* start, *end;
@ -752,7 +749,6 @@ char* SystemProcessInterface::SystemProcesses::ProcessIterator::get_cmdline() {
  char* cmdline = nullptr;

  jio_snprintf(buffer, PATH_MAX, "/proc/%s/cmdline", _entry->d_name);
-  buffer[PATH_MAX - 1] = '\0';
  if ((fp = os::fopen(buffer, "r")) != nullptr) {
    size_t size = 0;
    char   dummy;
@ -787,7 +783,6 @@ char* SystemProcessInterface::SystemProcesses::ProcessIterator::get_exe_path() {
  char buffer[PATH_MAX];

  jio_snprintf(buffer, PATH_MAX, "/proc/%s/exe", _entry->d_name);
-  buffer[PATH_MAX - 1] = '\0';
  return os::realpath(buffer, _exePath, PATH_MAX);
 }

@ -1001,7 +996,6 @@ int64_t NetworkPerformanceInterface::NetworkPerformance::read_counter(const char
    return -1;
  }

-  buf[num_bytes] = '\0';
  int64_t value = strtoll(buf, nullptr, 10);

  return value;
--- a/src/hotspot/os/posix/perfMemory_posix.cpp
+++ b/src/hotspot/os/posix/perfMemory_posix.cpp
@ -26,6 +26,7 @@
 #include "classfile/vmSymbols.hpp"
 #include "jvm_io.h"
 #include "logging/log.hpp"
+#include "logging/logStream.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "nmt/memTracker.hpp"
@ -71,9 +72,7 @@ static char* create_standard_memory(size_t size) {

  // commit memory
  if (!os::commit_memory(mapAddress, size, !ExecMem)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("Could not commit PerfData memory\n");
-    }
+    log_debug(perf)("could not commit PerfData memory");
    os::release_memory(mapAddress, size);
    return nullptr;
  }
@ -297,11 +296,12 @@ static DIR *open_directory_secure(const char* dirname) {
  RESTARTABLE(::open(dirname, O_RDONLY|O_NOFOLLOW), result);
  if (result == OS_ERR) {
    // Directory doesn't exist or is a symlink, so there is nothing to cleanup.
-    if (PrintMiscellaneous && Verbose) {
+    if (log_is_enabled(Debug, perf)) {
+      LogStreamHandle(Debug, perf) log;
      if (errno == ELOOP) {
-        warning("directory %s is a symlink and is not secure\n", dirname);
+        log.print_cr("directory %s is a symlink and is not secure", dirname);
      } else {
-        warning("could not open directory %s: %s\n", dirname, os::strerror(errno));
+        log.print_cr("could not open directory %s: %s", dirname, os::strerror(errno));
      }
    }
    return dirp;
@ -371,9 +371,7 @@ static DIR *open_directory_secure_cwd(const char* dirname, int *saved_cwd_fd) {
  // handle errors, otherwise shared memory files will be created in cwd.
  result = fchdir(fd);
  if (result == OS_ERR) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("could not change to directory %s", dirname);
-    }
+    log_debug(perf)("could not change to directory %s", dirname);
    if (*saved_cwd_fd != -1) {
      ::close(*saved_cwd_fd);
      *saved_cwd_fd = -1;
@ -411,16 +409,12 @@ static bool is_file_secure(int fd, const char *filename) {
  // Determine if the file is secure.
  RESTARTABLE(::fstat(fd, &statbuf), result);
  if (result == OS_ERR) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("fstat failed on %s: %s\n", filename, os::strerror(errno));
-    }
+    log_debug(perf)("fstat failed on %s: %s", filename, os::strerror(errno));
    return false;
  }
  if (statbuf.st_nlink > 1) {
    // A file with multiple links is not expected.
-    if (PrintMiscellaneous && Verbose) {
-      warning("file %s has multiple links\n", filename);
-    }
+    log_debug(perf)("file %s has multiple links", filename);
    return false;
  }
  return true;
@ -447,10 +441,10 @@ static char* get_user_name(uid_t uid) {
  int result = getpwuid_r(uid, &pwent, pwbuf, (size_t)bufsize, &p);

  if (result != 0 || p == nullptr || p->pw_name == nullptr || *(p->pw_name) == '\0') {
-    if (PrintMiscellaneous && Verbose) {
+    if (log_is_enabled(Debug, perf)) {
+      LogStreamHandle(Debug, perf) log;
      if (result != 0) {
-        warning("Could not retrieve passwd entry: %s\n",
-                os::strerror(result));
+        log.print_cr("Could not retrieve passwd entry: %s", os::strerror(result));
      }
      else if (p == nullptr) {
        // this check is added to protect against an observed problem
@ -463,13 +457,11 @@ static char* get_user_name(uid_t uid) {
        // message may result in an erroneous message.
        // Bug Id 89052 was opened with RedHat.
        //
-        warning("Could not retrieve passwd entry: %s\n",
-                os::strerror(errno));
+        log.print_cr("Could not retrieve passwd entry: %s", os::strerror(errno));
      }
      else {
-        warning("Could not determine user name: %s\n",
-                p->pw_name == nullptr ? "pw_name = null" :
-                                     "pw_name zero length");
+        log.print_cr("Could not determine user name: %s",
+                     p->pw_name == nullptr ? "pw_name = null" : "pw_name zero length");
      }
    }
    FREE_C_HEAP_ARRAY(char, pwbuf);
@ -680,10 +672,10 @@ static void remove_file(const char* path) {
  // maliciously planted, the directory's presence won't hurt anything.
  //
  RESTARTABLE(::unlink(path), result);
-  if (PrintMiscellaneous && Verbose && result == OS_ERR) {
+  if (log_is_enabled(Debug, perf) && result == OS_ERR) {
    if (errno != ENOENT) {
-      warning("Could not unlink shared memory backing"
-              " store file %s : %s\n", path, os::strerror(errno));
+      log_debug(perf)("could not unlink shared memory backing store file %s : %s",
+                      path, os::strerror(errno));
    }
  }
 }
@ -819,23 +811,16 @@ static bool make_user_tmp_dir(const char* dirname) {
      // The directory already exists and was probably created by another
      // JVM instance. However, this could also be the result of a
      // deliberate symlink. Verify that the existing directory is safe.
-      //
      if (!is_directory_secure(dirname)) {
        // directory is not secure
-        if (PrintMiscellaneous && Verbose) {
-          warning("%s directory is insecure\n", dirname);
-        }
+        log_debug(perf)("%s directory is insecure", dirname);
        return false;
      }
    }
    else {
      // we encountered some other failure while attempting
      // to create the directory
-      //
-      if (PrintMiscellaneous && Verbose) {
-        warning("could not create directory %s: %s\n",
-                dirname, os::strerror(errno));
-      }
+      log_debug(perf)("could not create directory %s: %s", dirname, os::strerror(errno));
      return false;
    }
  }
@ -872,11 +857,12 @@ static int create_sharedmem_file(const char* dirname, const char* filename, size
  int fd;
  RESTARTABLE(os::open(filename, O_RDWR|O_CREAT|O_NOFOLLOW, S_IRUSR|S_IWUSR), fd);
  if (fd == OS_ERR) {
-    if (PrintMiscellaneous && Verbose) {
+    if (log_is_enabled(Debug, perf)) {
+      LogStreamHandle(Debug, perf) log;
      if (errno == ELOOP) {
-        warning("file %s is a symlink and is not secure\n", filename);
+        log.print_cr("file %s is a symlink and is not secure", filename);
      } else {
-        warning("could not create file %s: %s\n", filename, os::strerror(errno));
+        log.print_cr("could not create file %s: %s", filename, os::strerror(errno));
      }
    }
    // close the directory and reset the current working directory
@ -924,18 +910,14 @@ static int create_sharedmem_file(const char* dirname, const char* filename, size
  // truncate the file to get rid of any existing data
  RESTARTABLE(::ftruncate(fd, (off_t)0), result);
  if (result == OS_ERR) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("could not truncate shared memory file: %s\n", os::strerror(errno));
-    }
+    log_debug(perf)("could not truncate shared memory file: %s", os::strerror(errno));
    ::close(fd);
    return -1;
  }
  // set the file size
  RESTARTABLE(::ftruncate(fd, (off_t)size), result);
  if (result == OS_ERR) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("could not set shared memory file size: %s\n", os::strerror(errno));
-    }
+    log_debug(perf)("could not set shared memory file size: %s", os::strerror(errno));
    ::close(fd);
    return -1;
  }
@ -1057,9 +1039,7 @@ static char* mmap_create_shared(size_t size) {
  assert(result != OS_ERR, "could not close file");

  if (mapAddress == MAP_FAILED) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("mmap failed -  %s\n", os::strerror(errno));
-    }
+    log_debug(perf)("mmap failed - %s", os::strerror(errno));
    remove_file(filename);
    FREE_C_HEAP_ARRAY(char, filename);
    return nullptr;
@ -1135,9 +1115,7 @@ static size_t sharedmem_filesize(int fd, TRAPS) {

  RESTARTABLE(::fstat(fd, &statbuf), result);
  if (result == OS_ERR) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("fstat failed: %s\n", os::strerror(errno));
-    }
+    log_debug(perf)("fstat failed: %s", os::strerror(errno));
    THROW_MSG_0(vmSymbols::java_io_IOException(),
                "Could not determine PerfMemory size");
  }
@ -1212,9 +1190,7 @@ static void mmap_attach_shared(int vmid, char** addr, size_t* sizep, TRAPS) {
  assert(result != OS_ERR, "could not close file");

  if (mapAddress == MAP_FAILED) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("mmap failed: %s\n", os::strerror(errno));
-    }
+    log_debug(perf)("mmap failed: %s", os::strerror(errno));
    THROW_MSG(vmSymbols::java_lang_OutOfMemoryError(),
              "Could not map PerfMemory");
  }
@ -1244,13 +1220,9 @@ void PerfMemory::create_memory_region(size_t size) {
  else {
    _start = create_shared_memory(size);
    if (_start == nullptr) {
-
      // creation of the shared memory region failed, attempt
      // to create a contiguous, non-shared memory region instead.
-      //
-      if (PrintMiscellaneous && Verbose) {
-        warning("Reverting to non-shared PerfMemory region.\n");
-      }
+      log_debug(perf)("Reverting to non-shared PerfMemory region.");
      FLAG_SET_ERGO(PerfDisableSharedMem, true);
      _start = create_standard_memory(size);
    }
--- a/src/hotspot/os/posix/signals_posix.cpp
+++ b/src/hotspot/os/posix/signals_posix.cpp
@ -42,6 +42,7 @@
 #include "signals_posix.hpp"
 #include "suspendResume_posix.hpp"
 #include "utilities/checkedCast.hpp"
+#include "utilities/deferredStatic.hpp"
 #include "utilities/events.hpp"
 #include "utilities/ostream.hpp"
 #include "utilities/parseInteger.hpp"
@ -167,9 +168,9 @@ static get_signal_t get_signal_action = nullptr;

 // suspend/resume support
 #if defined(__APPLE__)
-  static OSXSemaphore sr_semaphore;
+static DeferredStatic<OSXSemaphore> sr_semaphore;
 #else
-  static PosixSemaphore sr_semaphore;
+static DeferredStatic<PosixSemaphore> sr_semaphore;
 #endif

 // Signal number used to suspend/resume a thread
@ -177,7 +178,7 @@ static get_signal_t get_signal_action = nullptr;
 int PosixSignals::SR_signum = SIGUSR2;

 // sun.misc.Signal support
-static Semaphore* sig_semaphore = nullptr;
+static DeferredStatic<Semaphore> sig_semaphore;
 // a counter for each possible signal value
 static volatile jint pending_signals[NSIG+1] = { 0 };

@ -351,17 +352,16 @@ static void jdk_misc_signal_init() {
  ::memset((void*)pending_signals, 0, sizeof(pending_signals));

  // Initialize signal semaphore
-  sig_semaphore = new Semaphore();
+  int sem_count = 0;
+  sig_semaphore.initialize(sem_count);
 }

 void os::signal_notify(int sig) {
-  if (sig_semaphore != nullptr) {
+  // Signal thread is not created with ReduceSignalUsage and jdk_misc_signal_init
+  // initialization isn't called.
+  if (!ReduceSignalUsage) {
    AtomicAccess::inc(&pending_signals[sig]);
    sig_semaphore->signal();
-  } else {
-    // Signal thread is not created with ReduceSignalUsage and jdk_misc_signal_init
-    // initialization isn't called.
-    assert(ReduceSignalUsage, "signal semaphore should be created");
  }
 }

@ -1696,7 +1696,7 @@ static void SR_handler(int sig, siginfo_t* siginfo, void* context) {
      pthread_sigmask(SIG_BLOCK, nullptr, &suspend_set);
      sigdelset(&suspend_set, PosixSignals::SR_signum);

-      sr_semaphore.signal();
+      sr_semaphore->signal();

      // wait here until we are resumed
      while (1) {
@ -1705,7 +1705,7 @@ static void SR_handler(int sig, siginfo_t* siginfo, void* context) {
        SuspendResume::State result = osthread->sr.running();
        if (result == SuspendResume::SR_RUNNING) {
          // double check AIX doesn't need this!
-          sr_semaphore.signal();
+          sr_semaphore->signal();
          break;
        } else if (result != SuspendResume::SR_SUSPENDED) {
          ShouldNotReachHere();
@ -1731,6 +1731,9 @@ static void SR_handler(int sig, siginfo_t* siginfo, void* context) {
 }

 static int SR_initialize() {
+  int sem_count = 0;
+  sr_semaphore.initialize(sem_count);
+
  struct sigaction act;
  char *s;
  // Get signal number to use for suspend/resume
@ -1778,7 +1781,7 @@ static int sr_notify(OSThread* osthread) {
 // but this seems the normal response to library errors
 bool PosixSignals::do_suspend(OSThread* osthread) {
  assert(osthread->sr.is_running(), "thread should be running");
-  assert(!sr_semaphore.trywait(), "semaphore has invalid state");
+  assert(!sr_semaphore->trywait(), "semaphore has invalid state");

  // mark as suspended and send signal
  if (osthread->sr.request_suspend() != SuspendResume::SR_SUSPEND_REQUEST) {
@ -1793,7 +1796,7 @@ bool PosixSignals::do_suspend(OSThread* osthread) {

  // managed to send the signal and switch to SUSPEND_REQUEST, now wait for SUSPENDED
  while (true) {
-    if (sr_semaphore.timedwait(2)) {
+    if (sr_semaphore->timedwait(2)) {
      break;
    } else {
      // timeout
@ -1802,7 +1805,7 @@ bool PosixSignals::do_suspend(OSThread* osthread) {
        return false;
      } else if (cancelled == SuspendResume::SR_SUSPENDED) {
        // make sure that we consume the signal on the semaphore as well
-        sr_semaphore.wait();
+        sr_semaphore->wait();
        break;
      } else {
        ShouldNotReachHere();
@ -1817,7 +1820,7 @@ bool PosixSignals::do_suspend(OSThread* osthread) {

 void PosixSignals::do_resume(OSThread* osthread) {
  assert(osthread->sr.is_suspended(), "thread should be suspended");
-  assert(!sr_semaphore.trywait(), "invalid semaphore state");
+  assert(!sr_semaphore->trywait(), "invalid semaphore state");

  if (osthread->sr.request_wakeup() != SuspendResume::SR_WAKEUP_REQUEST) {
    // failed to switch to WAKEUP_REQUEST
@ -1827,7 +1830,7 @@ void PosixSignals::do_resume(OSThread* osthread) {

  while (true) {
    if (sr_notify(osthread) == 0) {
-      if (sr_semaphore.timedwait(2)) {
+      if (sr_semaphore->timedwait(2)) {
        if (osthread->sr.is_running()) {
          return;
        }
--- a/src/hotspot/os/windows/gc/z/zNUMA_windows.cpp
+++ b/src/hotspot/os/windows/gc/z/zNUMA_windows.cpp
@ -46,3 +46,7 @@ uint32_t ZNUMA::memory_id(uintptr_t addr) {
  // NUMA support not enabled, assume everything belongs to node zero
  return 0;
 }
+
+int ZNUMA::numa_id_to_node(uint32_t numa_id) {
+  ShouldNotCallThis();
+}
--- a/src/hotspot/os/windows/os_windows.cpp
+++ b/src/hotspot/os/windows/os_windows.cpp
@ -1827,12 +1827,12 @@ void * os::dll_load(const char *name, char *ebuf, int ebuflen) {
  }

  if (lib_arch_str != nullptr) {
-    os::snprintf_checked(ebuf, ebuflen - 1,
+    os::snprintf_checked(ebuf, ebuflen,
                         "Can't load %s-bit .dll on a %s-bit platform",
                         lib_arch_str, running_arch_str);
  } else {
    // don't know what architecture this dll was build for
-    os::snprintf_checked(ebuf, ebuflen - 1,
+    os::snprintf_checked(ebuf, ebuflen,
                         "Can't load this .dll (machine code=0x%x) on a %s-bit platform",
                         lib_arch, running_arch_str);
  }
@ -3150,7 +3150,6 @@ void os::large_page_init() {
  _large_page_size = os::win32::large_page_init_decide_size();
  const size_t default_page_size = os::vm_page_size();
  if (_large_page_size > default_page_size) {
-#if !defined(IA32)
    if (EnableAllLargePageSizesForWindows) {
      size_t min_size = GetLargePageMinimum();

@ -3159,7 +3158,6 @@ void os::large_page_init() {
        _page_sizes.add(page_size);
      }
    }
-#endif

    _page_sizes.add(_large_page_size);
  }
@ -4162,11 +4160,6 @@ void os::win32::initialize_system_info() {
  }
  _physical_memory = static_cast<physical_memory_size_type>(ms.ullTotalPhys);

-  if (FLAG_IS_DEFAULT(MaxRAM)) {
-    // Adjust MaxRAM according to the maximum virtual address space available.
-    FLAG_SET_DEFAULT(MaxRAM, MIN2(MaxRAM, (uint64_t) ms.ullTotalVirtual));
-  }
-
  _is_windows_server = IsWindowsServer();

  initialize_performance_counter();
@ -6259,3 +6252,106 @@ const void* os::get_saved_assert_context(const void** sigInfo) {
  *sigInfo = nullptr;
  return nullptr;
 }
+
+/*
+ * Windows/x64 does not use stack frames the way expected by Java:
+ * [1] in most cases, there is no frame pointer. All locals are addressed via RSP
+ * [2] in rare cases, when alloca() is used, a frame pointer is used, but this may
+ *     not be RBP.
+ * See http://msdn.microsoft.com/en-us/library/ew5tede7.aspx
+ *
+ * So it's not possible to print the native stack using the
+ *     while (...) {...  fr = os::get_sender_for_C_frame(&fr); }
+ * loop in vmError.cpp. We need to roll our own loop.
+ * This approach works for Windows AArch64 as well.
+ */
+bool os::win32::platform_print_native_stack(outputStream* st, const void* context,
+                                            char* buf, int buf_size, address& lastpc)
+{
+  CONTEXT ctx;
+  if (context != nullptr) {
+    memcpy(&ctx, context, sizeof(ctx));
+  } else {
+    RtlCaptureContext(&ctx);
+  }
+
+  st->print_cr("Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)");
+
+  DWORD machine_type;
+  STACKFRAME stk;
+  memset(&stk, 0, sizeof(stk));
+  stk.AddrStack.Mode      = AddrModeFlat;
+  stk.AddrFrame.Mode      = AddrModeFlat;
+  stk.AddrPC.Mode         = AddrModeFlat;
+
+#if defined(_M_AMD64)
+  stk.AddrStack.Offset    = ctx.Rsp;
+  stk.AddrFrame.Offset    = ctx.Rbp;
+  stk.AddrPC.Offset       = ctx.Rip;
+  machine_type            = IMAGE_FILE_MACHINE_AMD64;
+#elif defined(_M_ARM64)
+  stk.AddrStack.Offset    = ctx.Sp;
+  stk.AddrFrame.Offset    = ctx.Fp;
+  stk.AddrPC.Offset       = ctx.Pc;
+  machine_type            = IMAGE_FILE_MACHINE_ARM64;
+#else
+  #error unknown architecture
+#endif
+
+  // Ensure we consider dynamically loaded DLLs
+  SymbolEngine::refreshModuleList();
+
+  int count = 0;
+  address lastpc_internal = 0;
+  while (count++ < StackPrintLimit) {
+    intptr_t* sp = (intptr_t*)stk.AddrStack.Offset;
+    intptr_t* fp = (intptr_t*)stk.AddrFrame.Offset; // NOT necessarily the same as ctx.Rbp!
+    address pc = (address)stk.AddrPC.Offset;
+
+    if (pc != nullptr) {
+      if (count == 2 && lastpc_internal == pc) {
+        // Skip it -- StackWalk64() may return the same PC
+        // (but different SP) on the first try.
+      } else {
+        // Don't try to create a frame(sp, fp, pc) -- on WinX64, stk.AddrFrame
+        // may not contain what Java expects, and may cause the frame() constructor
+        // to crash. Let's just print out the symbolic address.
+        frame::print_C_frame(st, buf, buf_size, pc);
+        // print source file and line, if available
+        char buf[128];
+        int line_no;
+        if (SymbolEngine::get_source_info(pc, buf, sizeof(buf), &line_no)) {
+          st->print("  (%s:%d)", buf, line_no);
+        } else {
+          st->print("  (no source info available)");
+        }
+        st->cr();
+      }
+      lastpc_internal = pc;
+    }
+
+    PVOID p = WindowsDbgHelp::symFunctionTableAccess64(GetCurrentProcess(), stk.AddrPC.Offset);
+    if (p == nullptr) {
+      // StackWalk64() can't handle this PC. Calling StackWalk64 again may cause crash.
+      lastpc = lastpc_internal;
+      break;
+    }
+
+    BOOL result = WindowsDbgHelp::stackWalk64(
+        machine_type,              // __in      DWORD MachineType,
+        GetCurrentProcess(),       // __in      HANDLE hProcess,
+        GetCurrentThread(),        // __in      HANDLE hThread,
+        &stk,                      // __inout   LP STACKFRAME64 StackFrame,
+        &ctx);                     // __inout   PVOID ContextRecord,
+
+    if (!result) {
+      break;
+    }
+  }
+  if (count > StackPrintLimit) {
+    st->print_cr("...<more frames>...");
+  }
+  st->cr();
+
+  return true;
+}
--- a/src/hotspot/os/windows/perfMemory_windows.cpp
+++ b/src/hotspot/os/windows/perfMemory_windows.cpp
@ -24,6 +24,7 @@

 #include "classfile/vmSymbols.hpp"
 #include "logging/log.hpp"
+#include "logging/logStream.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "nmt/memTracker.hpp"
@ -41,11 +42,7 @@
 #include <sys/stat.h>
 #include <errno.h>
 #include <lmcons.h>
-
-typedef BOOL (WINAPI *SetSecurityDescriptorControlFnPtr)(
-   IN PSECURITY_DESCRIPTOR pSecurityDescriptor,
-   IN SECURITY_DESCRIPTOR_CONTROL ControlBitsOfInterest,
-   IN SECURITY_DESCRIPTOR_CONTROL ControlBitsToSet);
+#include <securitybaseapi.h>

 // Standard Memory Implementation Details

@ -62,9 +59,7 @@ static char* create_standard_memory(size_t size) {

  // commit memory
  if (!os::commit_memory(mapAddress, size, !ExecMem)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("Could not commit PerfData memory\n");
-    }
+    log_debug(perf)("could not commit PerfData memory");
    os::release_memory(mapAddress, size);
    return nullptr;
  }
@ -90,25 +85,21 @@ static void delete_standard_memory(char* addr, size_t size) {
 static void save_memory_to_file(char* addr, size_t size) {

  const char* destfile = PerfMemory::get_perfdata_file_path();
-  assert(destfile[0] != '\0', "invalid Perfdata file path");
+  assert(destfile[0] != '\0', "invalid PerfData file path");

  int fd = ::_open(destfile, _O_BINARY|_O_CREAT|_O_WRONLY|_O_TRUNC,
                   _S_IREAD|_S_IWRITE);

  if (fd == OS_ERR) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("Could not create Perfdata save file: %s: %s\n",
-              destfile, os::strerror(errno));
-    }
+    log_debug(perf)("could not create PerfData save file: %s: %s",
+                    destfile, os::strerror(errno));
  } else {
    for (size_t remaining = size; remaining > 0;) {

      int nbytes = ::_write(fd, addr, (unsigned int)remaining);
      if (nbytes == OS_ERR) {
-        if (PrintMiscellaneous && Verbose) {
-          warning("Could not write Perfdata save file: %s: %s\n",
-                  destfile, os::strerror(errno));
-        }
+        log_debug(perf)("could not write PerfData save file: %s: %s",
+                        destfile, os::strerror(errno));
        break;
      }

@ -117,10 +108,8 @@ static void save_memory_to_file(char* addr, size_t size) {
    }

    int result = ::_close(fd);
-    if (PrintMiscellaneous && Verbose) {
-      if (result == OS_ERR) {
-        warning("Could not close %s: %s\n", destfile, os::strerror(errno));
-      }
+    if (result == OS_ERR) {
+      log_debug(perf)("could not close %s: %s", destfile, os::strerror(errno));
    }
  }

@ -220,10 +209,8 @@ static bool is_directory_secure(const char* path) {
    }
    else {
      // unexpected error, declare the path insecure
-      if (PrintMiscellaneous && Verbose) {
-        warning("could not get attributes for file %s: "
-                " lasterror = %d\n", path, lasterror);
-      }
+      log_debug(perf)("could not get attributes for file %s: lasterror = %d",
+                      path, lasterror);
      return false;
    }
  }
@ -234,9 +221,7 @@ static bool is_directory_secure(const char* path) {
    // as some types of reparse points might be acceptable, but it
    // is probably more secure to avoid these conditions.
    //
-    if (PrintMiscellaneous && Verbose) {
-      warning("%s is a reparse point\n", path);
-    }
+    log_debug(perf)("%s is a reparse point", path);
    return false;
  }

@ -253,10 +238,8 @@ static bool is_directory_secure(const char* path) {
    // this is either a regular file or some other type of file,
    // any of which are unexpected and therefore insecure.
    //
-    if (PrintMiscellaneous && Verbose) {
-      warning("%s is not a directory, file attributes = "
-              INTPTR_FORMAT "\n", path, fa);
-    }
+    log_debug(perf)("%s is not a directory, file attributes : "
+                    INTPTR_FORMAT, path, fa);
    return false;
  }
 }
@ -492,11 +475,9 @@ static void remove_file(const char* dirname, const char* filename) {
  strcat(path, filename);

  if (::unlink(path) == OS_ERR) {
-    if (PrintMiscellaneous && Verbose) {
-      if (errno != ENOENT) {
-        warning("Could not unlink shared memory backing"
-                " store file %s : %s\n", path, os::strerror(errno));
-      }
+    if (errno != ENOENT) {
+      log_debug(perf)("could not unlink shared memory backing store file %s : %s",
+                      path, os::strerror(errno));
    }
  }

@ -515,20 +496,16 @@ static bool is_alive(int pid) {
  HANDLE ph = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, pid);
  if (ph == nullptr) {
    // the process does not exist.
-    if (PrintMiscellaneous && Verbose) {
-      DWORD lastError = GetLastError();
-      if (lastError != ERROR_INVALID_PARAMETER) {
-        warning("OpenProcess failed: %d\n", GetLastError());
-      }
+    DWORD lastError = GetLastError();
+    if (lastError != ERROR_INVALID_PARAMETER) {
+      log_debug(perf)("OpenProcess failed: %d", lastError);
    }
    return false;
  }

  DWORD exit_status;
  if (!GetExitCodeProcess(ph, &exit_status)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("GetExitCodeProcess failed: %d\n", GetLastError());
-    }
+    log_debug(perf)("GetExitCodeProcess failed: %d", GetLastError());
    CloseHandle(ph);
    return false;
  }
@ -545,17 +522,13 @@ static bool is_filesystem_secure(const char* path) {
  char fs_type[MAX_PATH];

  if (PerfBypassFileSystemCheck) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("bypassing file system criteria checks for %s\n", path);
-    }
+    log_debug(perf)("bypassing file system criteria checks for %s", path);
    return true;
  }

  char* first_colon = strchr((char *)path, ':');
  if (first_colon == nullptr) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("expected device specifier in path: %s\n", path);
-    }
+    log_debug(perf)("expected device specifier in path: %s", path);
    return false;
  }

@ -576,29 +549,22 @@ static bool is_filesystem_secure(const char* path) {
  if (!GetVolumeInformation(root_path, nullptr, 0, nullptr, &maxpath,
                            &flags, fs_type, MAX_PATH)) {
    // we can't get information about the volume, so assume unsafe.
-    if (PrintMiscellaneous && Verbose) {
-      warning("could not get device information for %s: "
-              " path = %s: lasterror = %d\n",
-              root_path, path, GetLastError());
-    }
+    log_debug(perf)("could not get device information for %s: path = %s: lasterror = %d",
+                    root_path, path, GetLastError());
    return false;
  }

  if ((flags & FS_PERSISTENT_ACLS) == 0) {
    // file system doesn't support ACLs, declare file system unsafe
-    if (PrintMiscellaneous && Verbose) {
-      warning("file system type %s on device %s does not support"
-              " ACLs\n", fs_type, root_path);
-    }
+    log_debug(perf)("file system type %s on device %s does not support ACLs",
+                    fs_type, root_path);
    return false;
  }

  if ((flags & FS_VOL_IS_COMPRESSED) != 0) {
    // file system is compressed, declare file system unsafe
-    if (PrintMiscellaneous && Verbose) {
-      warning("file system type %s on device %s is compressed\n",
-              fs_type, root_path);
-    }
+    log_debug(perf)("file system type %s on device %s is compressed",
+                    fs_type, root_path);
    return false;
  }

@ -704,9 +670,7 @@ static HANDLE create_file_mapping(const char* name, HANDLE fh, LPSECURITY_ATTRIB
               name);              /* LPCTSTR name for object */

  if (fmh == nullptr) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("CreateFileMapping failed, lasterror = %d\n", GetLastError());
-    }
+    log_debug(perf)("CreateFileMapping failed, lasterror = %d", GetLastError());
    return nullptr;
  }

@ -717,9 +681,7 @@ static HANDLE create_file_mapping(const char* name, HANDLE fh, LPSECURITY_ATTRIB
    // the other processes either exit or close their mapping objects
    // and/or mapped views of this mapping object.
    //
-    if (PrintMiscellaneous && Verbose) {
-      warning("file mapping already exists, lasterror = %d\n", GetLastError());
-    }
+    log_debug(perf)("file mapping already exists, lasterror = %d", GetLastError());

    CloseHandle(fmh);
    return nullptr;
@ -783,9 +745,7 @@ static PSID get_user_sid(HANDLE hProcess) {

  // get the process token
  if (!OpenProcessToken(hProcess, TOKEN_READ, &hAccessToken)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("OpenProcessToken failure: lasterror = %d \n", GetLastError());
-    }
+    log_debug(perf)("OpenProcessToken failure: lasterror = %d", GetLastError());
    return nullptr;
  }

@ -795,10 +755,8 @@ static PSID get_user_sid(HANDLE hProcess) {
  if (!GetTokenInformation(hAccessToken, TokenUser, nullptr, rsize, &rsize)) {
    DWORD lasterror = GetLastError();
    if (lasterror != ERROR_INSUFFICIENT_BUFFER) {
-      if (PrintMiscellaneous && Verbose) {
-        warning("GetTokenInformation failure: lasterror = %d,"
-                " rsize = %d\n", lasterror, rsize);
-      }
+      log_debug(perf)("GetTokenInformation failure: lasterror = %d, rsize = %d",
+                      lasterror, rsize);
      CloseHandle(hAccessToken);
      return nullptr;
    }
@ -808,10 +766,8 @@ static PSID get_user_sid(HANDLE hProcess) {

  // get the user token information
  if (!GetTokenInformation(hAccessToken, TokenUser, token_buf, rsize, &rsize)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("GetTokenInformation failure: lasterror = %d,"
-              " rsize = %d\n", GetLastError(), rsize);
-    }
+    log_debug(perf)("GetTokenInformation failure: lasterror = %d, rsize = %d",
+                    GetLastError(), rsize);
    FREE_C_HEAP_ARRAY(char, token_buf);
    CloseHandle(hAccessToken);
    return nullptr;
@ -821,10 +777,8 @@ static PSID get_user_sid(HANDLE hProcess) {
  PSID pSID = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);

  if (!CopySid(nbytes, pSID, token_buf->User.Sid)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("GetTokenInformation failure: lasterror = %d,"
-              " rsize = %d\n", GetLastError(), rsize);
-    }
+    log_debug(perf)("GetTokenInformation failure: lasterror = %d, rsize = %d",
+                    GetLastError(), rsize);
    FREE_C_HEAP_ARRAY(char, token_buf);
    FREE_C_HEAP_ARRAY(char, pSID);
    CloseHandle(hAccessToken);
@ -866,10 +820,8 @@ static bool add_allow_aces(PSECURITY_DESCRIPTOR pSD,

  // retrieve any existing access control list.
  if (!GetSecurityDescriptorDacl(pSD, &exists, &oldACL, &isdefault)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("GetSecurityDescriptor failure: lasterror = %d \n",
-              GetLastError());
-    }
+    log_debug(perf)("GetSecurityDescriptor failure: lasterror = %d",
+                    GetLastError());
    return false;
  }

@ -886,10 +838,8 @@ static bool add_allow_aces(PSECURITY_DESCRIPTOR pSD,
    if (!GetAclInformation(oldACL, &aclinfo,
                           sizeof(ACL_SIZE_INFORMATION),
                           AclSizeInformation)) {
-      if (PrintMiscellaneous && Verbose) {
-        warning("GetAclInformation failure: lasterror = %d \n", GetLastError());
-        return false;
-      }
+      log_debug(perf)("GetAclInformation failure: lasterror = %d", GetLastError());
+      return false;
    }
  } else {
    aclinfo.AceCount = 0; // assume null DACL
@ -914,9 +864,7 @@ static bool add_allow_aces(PSECURITY_DESCRIPTOR pSD,
  newACL = (PACL) NEW_C_HEAP_ARRAY(char, newACLsize, mtInternal);

  if (!InitializeAcl(newACL, newACLsize, ACL_REVISION)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("InitializeAcl failure: lasterror = %d \n", GetLastError());
-    }
+    log_debug(perf)("InitializeAcl failure: lasterror = %d", GetLastError());
    FREE_C_HEAP_ARRAY(char, newACL);
    return false;
  }
@ -927,9 +875,7 @@ static bool add_allow_aces(PSECURITY_DESCRIPTOR pSD,
    while (ace_index < aclinfo.AceCount) {
      LPVOID ace;
      if (!GetAce(oldACL, ace_index, &ace)) {
-        if (PrintMiscellaneous && Verbose) {
-          warning("InitializeAcl failure: lasterror = %d \n", GetLastError());
-        }
+        log_debug(perf)("InitializeAcl failure: lasterror = %d", GetLastError());
        FREE_C_HEAP_ARRAY(char, newACL);
        return false;
      }
@ -954,9 +900,7 @@ static bool add_allow_aces(PSECURITY_DESCRIPTOR pSD,
      if (matches == 0) {
        if (!AddAce(newACL, ACL_REVISION, MAXDWORD, ace,
                    ((PACE_HEADER)ace)->AceSize)) {
-          if (PrintMiscellaneous && Verbose) {
-            warning("AddAce failure: lasterror = %d \n", GetLastError());
-          }
+          log_debug(perf)("AddAce failure: lasterror = %d", GetLastError());
          FREE_C_HEAP_ARRAY(char, newACL);
          return false;
        }
@ -969,10 +913,8 @@ static bool add_allow_aces(PSECURITY_DESCRIPTOR pSD,
  for (int i = 0; i < ace_count; i++) {
    if (!AddAccessAllowedAce(newACL, ACL_REVISION,
                             aces[i].mask, aces[i].pSid)) {
-      if (PrintMiscellaneous && Verbose) {
-        warning("AddAccessAllowedAce failure: lasterror = %d \n",
-                GetLastError());
-      }
+      log_debug(perf)("AddAccessAllowedAce failure: lasterror = %d",
+                      GetLastError());
      FREE_C_HEAP_ARRAY(char, newACL);
      return false;
    }
@ -985,17 +927,13 @@ static bool add_allow_aces(PSECURITY_DESCRIPTOR pSD,
    while (ace_index < aclinfo.AceCount) {
      LPVOID ace;
      if (!GetAce(oldACL, ace_index, &ace)) {
-        if (PrintMiscellaneous && Verbose) {
-          warning("InitializeAcl failure: lasterror = %d \n", GetLastError());
-        }
+        log_debug(perf)("InitializeAcl failure: lasterror = %d", GetLastError());
        FREE_C_HEAP_ARRAY(char, newACL);
        return false;
      }
      if (!AddAce(newACL, ACL_REVISION, MAXDWORD, ace,
                  ((PACE_HEADER)ace)->AceSize)) {
-        if (PrintMiscellaneous && Verbose) {
-          warning("AddAce failure: lasterror = %d \n", GetLastError());
-        }
+        log_debug(perf)("AddAce failure: lasterror = %d", GetLastError());
        FREE_C_HEAP_ARRAY(char, newACL);
        return false;
      }
@ -1005,39 +943,23 @@ static bool add_allow_aces(PSECURITY_DESCRIPTOR pSD,

  // add the new ACL to the security descriptor.
  if (!SetSecurityDescriptorDacl(pSD, TRUE, newACL, FALSE)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("SetSecurityDescriptorDacl failure:"
-              " lasterror = %d \n", GetLastError());
-    }
+    log_debug(perf)("SetSecurityDescriptorDacl failure: lasterror = %d", GetLastError());
    FREE_C_HEAP_ARRAY(char, newACL);
    return false;
  }

-  // if running on windows 2000 or later, set the automatic inheritance
-  // control flags.
-  SetSecurityDescriptorControlFnPtr _SetSecurityDescriptorControl;
-  _SetSecurityDescriptorControl = (SetSecurityDescriptorControlFnPtr)
-       GetProcAddress(GetModuleHandle(TEXT("advapi32.dll")),
-                      "SetSecurityDescriptorControl");
-
-  if (_SetSecurityDescriptorControl != nullptr) {
-    // We do not want to further propagate inherited DACLs, so making them
-    // protected prevents that.
-    if (!_SetSecurityDescriptorControl(pSD, SE_DACL_PROTECTED,
-                                            SE_DACL_PROTECTED)) {
-      if (PrintMiscellaneous && Verbose) {
-        warning("SetSecurityDescriptorControl failure:"
-                " lasterror = %d \n", GetLastError());
-      }
-      FREE_C_HEAP_ARRAY(char, newACL);
-      return false;
-    }
+  // We do not want to further propagate inherited DACLs, so making them
+  // protected prevents that.
+  if (!SetSecurityDescriptorControl(pSD, SE_DACL_PROTECTED, SE_DACL_PROTECTED)) {
+    log_debug(perf)("SetSecurityDescriptorControl failure: lasterror = %d", GetLastError());
+    FREE_C_HEAP_ARRAY(char, newACL);
+    return false;
  }
-   // Note, the security descriptor maintains a reference to the newACL, not
-   // a copy of it. Therefore, the newACL is not freed here. It is freed when
-   // the security descriptor containing its reference is freed.
-   //
-   return true;
+
+  // Note, the security descriptor maintains a reference to the newACL, not
+  // a copy of it. Therefore, the newACL is not freed here. It is freed when
+  // the security descriptor containing its reference is freed.
+  return true;
 }

 // method to create a security attributes structure, which contains a
@ -1057,10 +979,7 @@ static LPSECURITY_ATTRIBUTES make_security_attr(ace_data_t aces[], int count) {

  // initialize the security descriptor
  if (!InitializeSecurityDescriptor(pSD, SECURITY_DESCRIPTOR_REVISION)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("InitializeSecurityDescriptor failure: "
-              "lasterror = %d \n", GetLastError());
-    }
+    log_debug(perf)("InitializeSecurityDescriptor failure: lasterror = %d", GetLastError());
    free_security_desc(pSD);
    return nullptr;
  }
@ -1113,11 +1032,7 @@ static LPSECURITY_ATTRIBUTES make_user_everybody_admin_security_attr(
           SECURITY_BUILTIN_DOMAIN_RID,
           DOMAIN_ALIAS_RID_ADMINS,
           0, 0, 0, 0, 0, 0, &administratorsSid)) {
-
-    if (PrintMiscellaneous && Verbose) {
-      warning("AllocateAndInitializeSid failure: "
-              "lasterror = %d \n", GetLastError());
-    }
+    log_debug(perf)("AllocateAndInitializeSid failure: lasterror = %d", GetLastError());
    return nullptr;
  }

@ -1131,11 +1046,7 @@ static LPSECURITY_ATTRIBUTES make_user_everybody_admin_security_attr(

  if (!AllocateAndInitializeSid( &SIDAuthEverybody, 1, SECURITY_WORLD_RID,
           0, 0, 0, 0, 0, 0, 0, &everybodySid)) {
-
-    if (PrintMiscellaneous && Verbose) {
-      warning("AllocateAndInitializeSid failure: "
-              "lasterror = %d \n", GetLastError());
-    }
+    log_debug(perf)("AllocateAndInitializeSid failure: lasterror = %d", GetLastError());
    return nullptr;
  }

@ -1236,9 +1147,7 @@ static bool make_user_tmp_dir(const char* dirname) {
      //
      if (!is_directory_secure(dirname)) {
        // directory is not secure
-        if (PrintMiscellaneous && Verbose) {
-          warning("%s directory is insecure\n", dirname);
-        }
+        log_debug(perf)("%s directory is insecure", dirname);
        free_security_attr(pDirSA);
        return false;
      }
@ -1249,16 +1158,11 @@ static bool make_user_tmp_dir(const char* dirname) {
      // DACLs might fix the corrupted the DACLs.
      SECURITY_INFORMATION secInfo = DACL_SECURITY_INFORMATION;
      if (!SetFileSecurity(dirname, secInfo, pDirSA->lpSecurityDescriptor)) {
-        if (PrintMiscellaneous && Verbose) {
-          lasterror = GetLastError();
-          warning("SetFileSecurity failed for %s directory.  lasterror %d \n",
-                                                        dirname, lasterror);
-        }
+        lasterror = GetLastError();
+        log_debug(perf)("SetFileSecurity failed for %s directory. lasterror = %d", dirname, lasterror);
      }
    } else {
-      if (PrintMiscellaneous && Verbose) {
-        warning("CreateDirectory failed: %d\n", GetLastError());
-      }
+      log_debug(perf)("CreateDirectory failed: %d", GetLastError());
      free_security_attr(pDirSA);
      return false;
    }
@ -1325,9 +1229,7 @@ static HANDLE create_sharedmem_resources(const char* dirname, const char* filena

  if (fh == INVALID_HANDLE_VALUE) {
    DWORD lasterror = GetLastError();
-    if (PrintMiscellaneous && Verbose) {
-      warning("could not create file %s: %d\n", filename, lasterror);
-    }
+    log_debug(perf)("could not create file %s: %d", filename, lasterror);
    free_security_attr(lpSmoSA);
    return nullptr;
  }
@ -1353,10 +1255,8 @@ static HANDLE create_sharedmem_resources(const char* dirname, const char* filena
    struct stat statbuf;
    int ret_code = ::stat(filename, &statbuf);
    if (ret_code == OS_ERR) {
-      if (PrintMiscellaneous && Verbose) {
-        warning("Could not get status information from file %s: %s\n",
-            filename, os::strerror(errno));
-      }
+      log_debug(perf)("could not get status information from file %s: %s",
+                      filename, os::strerror(errno));
      CloseHandle(fmh);
      CloseHandle(fh);
      fh = nullptr;
@ -1369,9 +1269,7 @@ static HANDLE create_sharedmem_resources(const char* dirname, const char* filena
    // call it when we observe the size as zero (0).
    if (statbuf.st_size == 0 && FlushFileBuffers(fh) != TRUE) {
      DWORD lasterror = GetLastError();
-      if (PrintMiscellaneous && Verbose) {
-        warning("could not flush file %s: %d\n", filename, lasterror);
-      }
+      log_debug(perf)("could not flush file %s: %d", filename, lasterror);
      CloseHandle(fmh);
      CloseHandle(fh);
      fh = nullptr;
@ -1402,10 +1300,8 @@ static HANDLE open_sharedmem_object(const char* objectname, DWORD ofm_access, TR

  if (fmh == nullptr) {
    DWORD lasterror = GetLastError();
-    if (PrintMiscellaneous && Verbose) {
-      warning("OpenFileMapping failed for shared memory object %s:"
-              " lasterror = %d\n", objectname, lasterror);
-    }
+    log_debug(perf)("OpenFileMapping failed for shared memory object %s:"
+                    " lasterror = %d", objectname, lasterror);
    THROW_MSG_(vmSymbols::java_lang_IllegalArgumentException(),
               err_msg("Could not open PerfMemory, error %d", lasterror),
               INVALID_HANDLE_VALUE);
@ -1485,9 +1381,7 @@ static char* mapping_create_shared(size_t size) {
                   (DWORD)size);            /* DWORD Number of bytes to map */

  if (mapAddress == nullptr) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("MapViewOfFile failed, lasterror = %d\n", GetLastError());
-    }
+    log_debug(perf)("MapViewOfFile failed, lasterror = %d", GetLastError());
    CloseHandle(sharedmem_fileMapHandle);
    sharedmem_fileMapHandle = nullptr;
    return nullptr;
@ -1551,20 +1445,14 @@ static size_t sharedmem_filesize(const char* filename, TRAPS) {
  // inconsistencies
  //
  if (::stat(filename, &statbuf) == OS_ERR) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("stat %s failed: %s\n", filename, os::strerror(errno));
-    }
+    log_debug(perf)("stat %s failed: %s", filename, os::strerror(errno));
    THROW_MSG_0(vmSymbols::java_io_IOException(),
                "Could not determine PerfMemory size");
  }

  if ((statbuf.st_size == 0) || (statbuf.st_size % os::vm_page_size() != 0)) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("unexpected file size: size = %zu\n",
-              statbuf.st_size);
-    }
-    THROW_MSG_0(vmSymbols::java_io_IOException(),
-                "Invalid PerfMemory size");
+    log_debug(perf)("unexpected file size: size = %zu", statbuf.st_size);
+    THROW_MSG_0(vmSymbols::java_io_IOException(), "Invalid PerfMemory size");
  }

  return statbuf.st_size;
@ -1637,9 +1525,7 @@ static void open_file_mapping(int vmid, char** addrp, size_t* sizep, TRAPS) {
                 size);           /* DWORD Number of bytes to map */

  if (mapAddress == nullptr) {
-    if (PrintMiscellaneous && Verbose) {
-      warning("MapViewOfFile failed, lasterror = %d\n", GetLastError());
-    }
+    log_debug(perf)("MapViewOfFile failed, lasterror = %d", GetLastError());
    CloseHandle(fmh);
    THROW_MSG(vmSymbols::java_lang_OutOfMemoryError(),
              "Could not map PerfMemory");
@ -1708,9 +1594,7 @@ void PerfMemory::create_memory_region(size_t size) {
      // creation of the shared memory region failed, attempt
      // to create a contiguous, non-shared memory region instead.
      //
-      if (PrintMiscellaneous && Verbose) {
-        warning("Reverting to non-shared PerfMemory region.\n");
-      }
+      log_debug(perf)("Reverting to non-shared PerfMemory region.");
      FLAG_SET_ERGO(PerfDisableSharedMem, true);
      _start = create_standard_memory(size);
    }
--- a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp
+++ b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp
@ -86,9 +86,7 @@ char* os::non_memory_address_word() {

 address os::Posix::ucontext_get_pc(const ucontext_t* uc) {
  if (DecodeErrorContext) {
-#if defined(IA32)
-    return (address)uc->uc_mcontext.gregs[REG_EIP];
-#elif defined(AMD64)
+#if defined(AMD64)
    return (address)uc->uc_mcontext.gregs[REG_RIP];
 #elif defined(ARM)
    return (address)uc->uc_mcontext.arm_pc;
@ -117,9 +115,7 @@ void os::Posix::ucontext_set_pc(ucontext_t* uc, address pc) {

 intptr_t* os::Linux::ucontext_get_sp(const ucontext_t* uc) {
  if (DecodeErrorContext) {
-#if defined(IA32)
-    return (intptr_t*)uc->uc_mcontext.gregs[REG_UESP];
-#elif defined(AMD64)
+#if defined(AMD64)
    return (intptr_t*)uc->uc_mcontext.gregs[REG_RSP];
 #elif defined(ARM)
    return (intptr_t*)uc->uc_mcontext.arm_sp;
@ -144,9 +140,7 @@ intptr_t* os::Linux::ucontext_get_sp(const ucontext_t* uc) {

 intptr_t* os::Linux::ucontext_get_fp(const ucontext_t* uc) {
  if (DecodeErrorContext) {
-#if defined(IA32)
-    return (intptr_t*)uc->uc_mcontext.gregs[REG_EBP];
-#elif defined(AMD64)
+#if defined(AMD64)
    return (intptr_t*)uc->uc_mcontext.gregs[REG_RBP];
 #elif defined(ARM)
    return (intptr_t*)uc->uc_mcontext.arm_fp;
--- a/src/hotspot/os_cpu/windows_aarch64/os_windows_aarch64.inline.hpp
+++ b/src/hotspot/os_cpu/windows_aarch64/os_windows_aarch64.inline.hpp
@ -26,10 +26,17 @@
 #define OS_CPU_WINDOWS_AARCH64_OS_WINDOWS_AARCH64_INLINE_HPP

 #include "runtime/os.hpp"
+#include "os_windows.hpp"

 inline bool os::register_code_area(char *low, char *high) {
  // Using Vectored Exception Handling
  return true;
 }

+#define HAVE_PLATFORM_PRINT_NATIVE_STACK 1
+inline bool os::platform_print_native_stack(outputStream* st, const void* context,
+                                            char *buf, int buf_size, address& lastpc) {
+  return os::win32::platform_print_native_stack(st, context, buf, buf_size, lastpc);
+}
+
 #endif // OS_CPU_WINDOWS_AARCH64_OS_WINDOWS_AARCH64_INLINE_HPP
--- a/src/hotspot/os_cpu/windows_x86/os_windows_x86.cpp
+++ b/src/hotspot/os_cpu/windows_x86/os_windows_x86.cpp
@ -51,6 +51,8 @@
 #include "utilities/vmError.hpp"
 #include "windbghelp.hpp"

+#include <intrin.h>
+

 #undef REG_SP
 #undef REG_FP
@ -197,98 +199,6 @@ bool handle_FLT_exception(struct _EXCEPTION_POINTERS* exceptionInfo) {
 }
 #endif

-#ifdef HAVE_PLATFORM_PRINT_NATIVE_STACK
-/*
- * Windows/x64 does not use stack frames the way expected by Java:
- * [1] in most cases, there is no frame pointer. All locals are addressed via RSP
- * [2] in rare cases, when alloca() is used, a frame pointer is used, but this may
- *     not be RBP.
- * See http://msdn.microsoft.com/en-us/library/ew5tede7.aspx
- *
- * So it's not possible to print the native stack using the
- *     while (...) {...  fr = os::get_sender_for_C_frame(&fr); }
- * loop in vmError.cpp. We need to roll our own loop.
- */
-bool os::win32::platform_print_native_stack(outputStream* st, const void* context,
-                                            char *buf, int buf_size, address& lastpc)
-{
-  CONTEXT ctx;
-  if (context != nullptr) {
-    memcpy(&ctx, context, sizeof(ctx));
-  } else {
-    RtlCaptureContext(&ctx);
-  }
-
-  st->print_cr("Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)");
-
-  STACKFRAME stk;
-  memset(&stk, 0, sizeof(stk));
-  stk.AddrStack.Offset    = ctx.Rsp;
-  stk.AddrStack.Mode      = AddrModeFlat;
-  stk.AddrFrame.Offset    = ctx.Rbp;
-  stk.AddrFrame.Mode      = AddrModeFlat;
-  stk.AddrPC.Offset       = ctx.Rip;
-  stk.AddrPC.Mode         = AddrModeFlat;
-
-  // Ensure we consider dynamically loaded dll's
-  SymbolEngine::refreshModuleList();
-
-  int count = 0;
-  address lastpc_internal = 0;
-  while (count++ < StackPrintLimit) {
-    intptr_t* sp = (intptr_t*)stk.AddrStack.Offset;
-    intptr_t* fp = (intptr_t*)stk.AddrFrame.Offset; // NOT necessarily the same as ctx.Rbp!
-    address pc = (address)stk.AddrPC.Offset;
-
-    if (pc != nullptr) {
-      if (count == 2 && lastpc_internal == pc) {
-        // Skip it -- StackWalk64() may return the same PC
-        // (but different SP) on the first try.
-      } else {
-        // Don't try to create a frame(sp, fp, pc) -- on WinX64, stk.AddrFrame
-        // may not contain what Java expects, and may cause the frame() constructor
-        // to crash. Let's just print out the symbolic address.
-        frame::print_C_frame(st, buf, buf_size, pc);
-        // print source file and line, if available
-        char buf[128];
-        int line_no;
-        if (SymbolEngine::get_source_info(pc, buf, sizeof(buf), &line_no)) {
-          st->print("  (%s:%d)", buf, line_no);
-        } else {
-          st->print("  (no source info available)");
-        }
-        st->cr();
-      }
-      lastpc_internal = pc;
-    }
-
-    PVOID p = WindowsDbgHelp::symFunctionTableAccess64(GetCurrentProcess(), stk.AddrPC.Offset);
-    if (!p) {
-      // StackWalk64() can't handle this PC. Calling StackWalk64 again may cause crash.
-      lastpc = lastpc_internal;
-      break;
-    }
-
-    BOOL result = WindowsDbgHelp::stackWalk64(
-        IMAGE_FILE_MACHINE_AMD64,  // __in      DWORD MachineType,
-        GetCurrentProcess(),       // __in      HANDLE hProcess,
-        GetCurrentThread(),        // __in      HANDLE hThread,
-        &stk,                      // __inout   LP STACKFRAME64 StackFrame,
-        &ctx);                     // __inout   PVOID ContextRecord,
-
-    if (!result) {
-      break;
-    }
-  }
-  if (count > StackPrintLimit) {
-    st->print_cr("...<more frames>...");
-  }
-  st->cr();
-
-  return true;
-}
-#endif // HAVE_PLATFORM_PRINT_NATIVE_STACK
-
 address os::fetch_frame_from_context(const void* ucVoid,
                    intptr_t** ret_sp, intptr_t** ret_fp) {

@ -339,11 +249,15 @@ intptr_t* os::fetch_bcp_from_context(const void* ucVoid) {

 // Returns the current stack pointer. Accurate value needed for
 // os::verify_stack_alignment().
+// The function is intentionally not inlined. This way, the transfer of control
+// into this method must be made with a call instruction. The MSVC
+// _AddressOfReturnAddress() intrinsic returns the address of the return PC
+// saved by that call instruction. Therefore, the stack pointer of the caller
+// just before the call instruction, is acquired by skipping over the return PC
+// slot in the stack.
+__declspec(noinline)
 address os::current_stack_pointer() {
-  typedef address get_sp_func();
-  get_sp_func* func = CAST_TO_FN_PTR(get_sp_func*,
-                                     StubRoutines::x86::get_previous_sp_entry());
-  return (*func)();
+  return ((address)_AddressOfReturnAddress()) + sizeof(void*);
 }

 bool os::win32::get_frame_at_stack_banging_point(JavaThread* thread,
@ -500,11 +414,7 @@ void os::setup_fpu() {

 #ifndef PRODUCT
 void os::verify_stack_alignment() {
-  // The current_stack_pointer() calls generated get_previous_sp stub routine.
-  // Only enable the assert after the routine becomes available.
-  if (StubRoutines::initial_stubs_code() != nullptr) {
-    assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
-  }
+  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
 }
 #endif

--- a/src/hotspot/share/adlc/archDesc.cpp
+++ b/src/hotspot/share/adlc/archDesc.cpp
@ -899,10 +899,12 @@ int ArchDesc::emit_msg(int quiet, int flag, int line, const char *fmt,

 // Construct the name of the register mask.
 static const char *getRegMask(const char *reg_class_name) {
-  if( reg_class_name == nullptr ) return "RegMask::Empty";
+  if (reg_class_name == nullptr) {
+    return "RegMask::EMPTY";
+  }

  if (strcmp(reg_class_name,"Universe")==0) {
-    return "RegMask::Empty";
+    return "RegMask::EMPTY";
  } else if (strcmp(reg_class_name,"stack_slots")==0) {
    return "(Compile::current()->FIRST_STACK_mask())";
  } else if (strcmp(reg_class_name, "dynamic")==0) {
@ -920,7 +922,7 @@ static const char *getRegMask(const char *reg_class_name) {

 // Convert a register class name to its register mask.
 const char *ArchDesc::reg_class_to_reg_mask(const char *rc_name) {
-  const char *reg_mask = "RegMask::Empty";
+  const char* reg_mask = "RegMask::EMPTY";

  if( _register ) {
    RegClass *reg_class  = _register->getRegClass(rc_name);
@ -939,7 +941,7 @@ const char *ArchDesc::reg_class_to_reg_mask(const char *rc_name) {

 // Obtain the name of the RegMask for an OperandForm
 const char *ArchDesc::reg_mask(OperandForm  &opForm) {
-  const char *regMask      = "RegMask::Empty";
+  const char* regMask = "RegMask::EMPTY";

  // Check constraints on result's register class
  const char *result_class = opForm.constrained_reg_class();
@ -968,9 +970,9 @@ const char *ArchDesc::reg_mask(InstructForm &inForm) {
    abort();
  }

-  // Instructions producing 'Universe' use RegMask::Empty
+  // Instructions producing 'Universe' use RegMask::EMPTY
  if (strcmp(result,"Universe") == 0) {
-    return "RegMask::Empty";
+    return "RegMask::EMPTY";
  }

  // Lookup this result operand and get its register class
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@ -2422,7 +2422,7 @@ const char *OperandForm::constrained_reg_class() const {

 // Return the register class associated with 'leaf'.
 const char *OperandForm::in_reg_class(uint leaf, FormDict &globals) {
-  const char *reg_class = nullptr; // "RegMask::Empty";
+  const char* reg_class = nullptr; // "RegMask::EMPTY";

  if((_matrule == nullptr) || (_matrule->is_chain_rule(globals))) {
    reg_class = constrained_reg_class();
--- a/src/hotspot/share/adlc/output_c.cpp
+++ b/src/hotspot/share/adlc/output_c.cpp
@ -2323,7 +2323,7 @@ private:
    if (strcmp(rep_var,"$Register") == 0)      return "as_Register";
    if (strcmp(rep_var,"$KRegister") == 0)     return "as_KRegister";
    if (strcmp(rep_var,"$FloatRegister") == 0) return "as_FloatRegister";
-#if defined(IA32) || defined(AMD64)
+#if defined(AMD64)
    if (strcmp(rep_var,"$XMMRegister") == 0)   return "as_XMMRegister";
 #endif
    if (strcmp(rep_var,"$CondRegister") == 0)  return "as_ConditionRegister";
@ -2837,7 +2837,7 @@ static void defineIn_RegMask(FILE *fp, FormDict &globals, OperandForm &oper) {
      if (strcmp(first_reg_class, "stack_slots") == 0) {
        fprintf(fp,"  return &(Compile::current()->FIRST_STACK_mask());\n");
      } else if (strcmp(first_reg_class, "dynamic") == 0) {
-        fprintf(fp,"  return &RegMask::Empty;\n");
+        fprintf(fp, "  return &RegMask::EMPTY;\n");
      } else {
        const char* first_reg_class_to_upper = toUpper(first_reg_class);
        fprintf(fp,"  return &%s_mask();\n", first_reg_class_to_upper);
--- a/src/hotspot/share/c1/c1_CodeStubs.hpp
+++ b/src/hotspot/share/c1/c1_CodeStubs.hpp
@ -138,7 +138,7 @@ class ConversionStub: public CodeStub {
 public:
  ConversionStub(Bytecodes::Code bytecode, LIR_Opr input, LIR_Opr result)
    : _bytecode(bytecode), _input(input), _result(result) {
-    NOT_IA32( ShouldNotReachHere(); ) // used only on x86-32
+    ShouldNotReachHere();
  }

  Bytecodes::Code bytecode() { return _bytecode; }
--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
@ -527,16 +527,6 @@ void LIR_Assembler::emit_op1(LIR_Op1* op) {
      safepoint_poll(op->in_opr(), op->info());
      break;

-#ifdef IA32
-    case lir_fxch:
-      fxch(op->in_opr()->as_jint());
-      break;
-
-    case lir_fld:
-      fld(op->in_opr()->as_jint());
-      break;
-#endif // IA32
-
    case lir_branch:
      break;

@ -612,12 +602,6 @@ void LIR_Assembler::emit_op0(LIR_Op0* op) {
      osr_entry();
      break;

-#ifdef IA32
-    case lir_fpop_raw:
-      fpop();
-      break;
-#endif // IA32
-
    case lir_breakpoint:
      breakpoint();
      break;
--- a/src/hotspot/share/cds/aotConstantPoolResolver.cpp
+++ b/src/hotspot/share/cds/aotConstantPoolResolver.cpp
@ -224,8 +224,46 @@ void AOTConstantPoolResolver::preresolve_field_and_method_cp_entries(JavaThread*
      bcs.next();
      Bytecodes::Code raw_bc = bcs.raw_code();
      switch (raw_bc) {
+      case Bytecodes::_getstatic:
+      case Bytecodes::_putstatic:
+        maybe_resolve_fmi_ref(ik, m, raw_bc, bcs.get_index_u2(), preresolve_list, THREAD);
+        if (HAS_PENDING_EXCEPTION) {
+          CLEAR_PENDING_EXCEPTION; // just ignore
+        }
+        break;
      case Bytecodes::_getfield:
+      // no-fast bytecode
+      case Bytecodes::_nofast_getfield:
+      // fast bytecodes
+      case Bytecodes::_fast_agetfield:
+      case Bytecodes::_fast_bgetfield:
+      case Bytecodes::_fast_cgetfield:
+      case Bytecodes::_fast_dgetfield:
+      case Bytecodes::_fast_fgetfield:
+      case Bytecodes::_fast_igetfield:
+      case Bytecodes::_fast_lgetfield:
+      case Bytecodes::_fast_sgetfield:
+        raw_bc = Bytecodes::_getfield;
+        maybe_resolve_fmi_ref(ik, m, raw_bc, bcs.get_index_u2(), preresolve_list, THREAD);
+        if (HAS_PENDING_EXCEPTION) {
+          CLEAR_PENDING_EXCEPTION; // just ignore
+        }
+        break;
+
      case Bytecodes::_putfield:
+      // no-fast bytecode
+      case Bytecodes::_nofast_putfield:
+      // fast bytecodes
+      case Bytecodes::_fast_aputfield:
+      case Bytecodes::_fast_bputfield:
+      case Bytecodes::_fast_zputfield:
+      case Bytecodes::_fast_cputfield:
+      case Bytecodes::_fast_dputfield:
+      case Bytecodes::_fast_fputfield:
+      case Bytecodes::_fast_iputfield:
+      case Bytecodes::_fast_lputfield:
+      case Bytecodes::_fast_sputfield:
+        raw_bc = Bytecodes::_putfield;
        maybe_resolve_fmi_ref(ik, m, raw_bc, bcs.get_index_u2(), preresolve_list, THREAD);
        if (HAS_PENDING_EXCEPTION) {
          CLEAR_PENDING_EXCEPTION; // just ignore
@ -235,6 +273,7 @@ void AOTConstantPoolResolver::preresolve_field_and_method_cp_entries(JavaThread*
      case Bytecodes::_invokespecial:
      case Bytecodes::_invokevirtual:
      case Bytecodes::_invokeinterface:
+      case Bytecodes::_invokestatic:
        maybe_resolve_fmi_ref(ik, m, raw_bc, bcs.get_index_u2(), preresolve_list, THREAD);
        if (HAS_PENDING_EXCEPTION) {
          CLEAR_PENDING_EXCEPTION; // just ignore
@ -271,13 +310,31 @@ void AOTConstantPoolResolver::maybe_resolve_fmi_ref(InstanceKlass* ik, Method* m
  }

  Klass* resolved_klass = cp->klass_ref_at(raw_index, bc, CHECK);
+  const char* is_static = "";

  switch (bc) {
+  case Bytecodes::_getstatic:
+  case Bytecodes::_putstatic:
+    if (!VM_Version::supports_fast_class_init_checks()) {
+      return; // Do not resolve since interpreter lacks fast clinit barriers support
+    }
+    InterpreterRuntime::resolve_get_put(bc, raw_index, mh, cp, false /*initialize_holder*/, CHECK);
+    is_static = " *** static";
+    break;
+
  case Bytecodes::_getfield:
  case Bytecodes::_putfield:
    InterpreterRuntime::resolve_get_put(bc, raw_index, mh, cp, false /*initialize_holder*/, CHECK);
    break;

+  case Bytecodes::_invokestatic:
+    if (!VM_Version::supports_fast_class_init_checks()) {
+      return; // Do not resolve since interpreter lacks fast clinit barriers support
+    }
+    InterpreterRuntime::cds_resolve_invoke(bc, raw_index, cp, CHECK);
+    is_static = " *** static";
+    break;
+
  case Bytecodes::_invokevirtual:
  case Bytecodes::_invokespecial:
  case Bytecodes::_invokeinterface:
@ -297,11 +354,11 @@ void AOTConstantPoolResolver::maybe_resolve_fmi_ref(InstanceKlass* ik, Method* m
    bool resolved = cp->is_resolved(raw_index, bc);
    Symbol* name = cp->name_ref_at(raw_index, bc);
    Symbol* signature = cp->signature_ref_at(raw_index, bc);
-    log_trace(aot, resolve)("%s %s [%3d] %s -> %s.%s:%s",
+    log_trace(aot, resolve)("%s %s [%3d] %s -> %s.%s:%s%s",
                            (resolved ? "Resolved" : "Failed to resolve"),
                            Bytecodes::name(bc), cp_index, ik->external_name(),
                            resolved_klass->external_name(),
-                            name->as_C_string(), signature->as_C_string());
+                            name->as_C_string(), signature->as_C_string(), is_static);
  }
 }

--- a/src/hotspot/share/cds/aotLinkedClassBulkLoader.cpp
+++ b/src/hotspot/share/cds/aotLinkedClassBulkLoader.cpp
@ -42,6 +42,8 @@
 #include "oops/trainingData.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
+#include "runtime/serviceThread.hpp"
+#include "utilities/growableArray.hpp"

 void AOTLinkedClassBulkLoader::serialize(SerializeClosure* soc) {
  AOTLinkedClassTable::get()->serialize(soc);
@ -53,6 +55,8 @@ void AOTLinkedClassBulkLoader::serialize(SerializeClosure* soc) {
 // step in restoring the JVM's state from the snapshot recorded in the AOT cache: other AOT optimizations
 // such as AOT compiled methods can make direct references to the preloaded classes, knowing that
 // these classes are guaranteed to be in at least the "loaded" state.
+//
+// Note: we can't link the classes yet because SharedRuntime is not yet ready to generate adapters.
 void AOTLinkedClassBulkLoader::preload_classes(JavaThread* current) {
  preload_classes_impl(current);
  if (current->has_pending_exception()) {
@ -112,6 +116,44 @@ void AOTLinkedClassBulkLoader::preload_classes_in_table(Array<InstanceKlass*>* c
  }
 }

+// Some cached heap objects may hold references to methods in aot-linked
+// classes (via MemberName). We need to make sure all classes are
+// linked before executing any bytecode.
+void AOTLinkedClassBulkLoader::link_classes(JavaThread* current) {
+  link_classes_impl(current);
+  if (current->has_pending_exception()) {
+    exit_on_exception(current);
+  }
+}
+
+void AOTLinkedClassBulkLoader::link_classes_impl(TRAPS) {
+  precond(CDSConfig::is_using_aot_linked_classes());
+
+  AOTLinkedClassTable* table = AOTLinkedClassTable::get();
+
+  link_classes_in_table(table->boot1(), CHECK);
+  link_classes_in_table(table->boot2(), CHECK);
+  link_classes_in_table(table->platform(), CHECK);
+  link_classes_in_table(table->app(), CHECK);
+}
+
+void AOTLinkedClassBulkLoader::link_classes_in_table(Array<InstanceKlass*>* classes, TRAPS) {
+  if (classes != nullptr) {
+    for (int i = 0; i < classes->length(); i++) {
+      // NOTE: CDSConfig::is_preserving_verification_constraints() is required
+      // when storing ik in the AOT cache. This means we don't have to verify
+      // ik at all.
+      //
+      // Without is_preserving_verification_constraints(), ik->link_class() may cause
+      // class loading, which may result in invocation of ClassLoader::loadClass() calls,
+      // which CANNOT happen because we are not ready to execute any Java byecodes yet
+      // at this point.
+      InstanceKlass* ik = classes->at(i);
+      ik->link_class(CHECK);
+    }
+  }
+}
+
 #ifdef ASSERT
 void AOTLinkedClassBulkLoader::validate_module_of_preloaded_classes() {
  oop javabase_module_oop = ModuleEntryTable::javabase_moduleEntry()->module_oop();
@ -173,25 +215,21 @@ void AOTLinkedClassBulkLoader::validate_module(Klass* k, const char* category_na
 }
 #endif

-// Link all java.base classes in the AOTLinkedClassTable. Of those classes,
-// move the ones that have been AOT-initialized to the "initialized" state.
-void AOTLinkedClassBulkLoader::link_or_init_javabase_classes(JavaThread* current) {
-  link_or_init_classes_for_loader(Handle(), AOTLinkedClassTable::get()->boot1(), current);
+void AOTLinkedClassBulkLoader::init_javabase_classes(JavaThread* current) {
+  init_classes_for_loader(Handle(), AOTLinkedClassTable::get()->boot1(), current);
  if (current->has_pending_exception()) {
    exit_on_exception(current);
  }
 }

-// Do the same thing as link_or_init_javabase_classes(), but for the classes that are not
-// in the java.base module.
-void AOTLinkedClassBulkLoader::link_or_init_non_javabase_classes(JavaThread* current) {
-  link_or_init_non_javabase_classes_impl(current);
+void AOTLinkedClassBulkLoader::init_non_javabase_classes(JavaThread* current) {
+  init_non_javabase_classes_impl(current);
  if (current->has_pending_exception()) {
    exit_on_exception(current);
  }
 }

-void AOTLinkedClassBulkLoader::link_or_init_non_javabase_classes_impl(TRAPS) {
+void AOTLinkedClassBulkLoader::init_non_javabase_classes_impl(TRAPS) {
  assert(CDSConfig::is_using_aot_linked_classes(), "sanity");

  DEBUG_ONLY(validate_module_of_preloaded_classes());
@ -208,9 +246,9 @@ void AOTLinkedClassBulkLoader::link_or_init_non_javabase_classes_impl(TRAPS) {
  assert(h_system_loader() != nullptr,   "must be");

  AOTLinkedClassTable* table = AOTLinkedClassTable::get();
-  link_or_init_classes_for_loader(Handle(), table->boot2(), CHECK);
-  link_or_init_classes_for_loader(h_platform_loader, table->platform(), CHECK);
-  link_or_init_classes_for_loader(h_system_loader, table->app(), CHECK);
+  init_classes_for_loader(Handle(), table->boot2(), CHECK);
+  init_classes_for_loader(h_platform_loader, table->platform(), CHECK);
+  init_classes_for_loader(h_system_loader, table->app(), CHECK);

  if (Universe::is_fully_initialized() && VerifyDuringStartup) {
    // Make sure we're still in a clean state.
@ -242,8 +280,9 @@ void AOTLinkedClassBulkLoader::exit_on_exception(JavaThread* current) {
    log_error(aot)("Out of memory. Please run with a larger Java heap, current MaxHeapSize = "
                   "%zuM", MaxHeapSize/M);
  } else {
+    oop message = java_lang_Throwable::message(current->pending_exception());
    log_error(aot)("%s: %s", current->pending_exception()->klass()->external_name(),
-                   java_lang_String::as_utf8_string(java_lang_Throwable::message(current->pending_exception())));
+                   message == nullptr ? "(no message)" : java_lang_String::as_utf8_string(message));
  }
  vm_exit_during_initialization("Unexpected exception when loading aot-linked classes.");
 }
@ -289,23 +328,13 @@ void AOTLinkedClassBulkLoader::initiate_loading(JavaThread* current, const char*
 // - classes that were AOT-initialized by AOTClassInitializer
 // - the classes of all objects that are reachable from the archived mirrors of
 //   the AOT-linked classes for <class_loader>.
-void AOTLinkedClassBulkLoader::link_or_init_classes_for_loader(Handle class_loader, Array<InstanceKlass*>* classes, TRAPS) {
+void AOTLinkedClassBulkLoader::init_classes_for_loader(Handle class_loader, Array<InstanceKlass*>* classes, TRAPS) {
  if (classes != nullptr) {
    for (int i = 0; i < classes->length(); i++) {
      InstanceKlass* ik = classes->at(i);
-      if (ik->class_loader_data() == nullptr) {
-        // This class is not yet loaded. We will initialize it in a later phase.
-        // For example, we have loaded only AOTLinkedClassCategory::BOOT1 classes
-        // but k is part of AOTLinkedClassCategory::BOOT2.
-        continue;
-      }
+      assert(ik->class_loader_data() != nullptr, "must be");
      if (ik->has_aot_initialized_mirror()) {
        ik->initialize_with_aot_initialized_mirror(CHECK);
-      } else {
-        // Some cached heap objects may hold references to methods in aot-linked
-        // classes (via MemberName). We need to make sure all classes are
-        // linked to allow such MemberNames to be invoked.
-        ik->link_class(CHECK);
      }
    }
  }
--- a/src/hotspot/share/cds/aotLinkedClassBulkLoader.hpp
+++ b/src/hotspot/share/cds/aotLinkedClassBulkLoader.hpp
@ -52,10 +52,11 @@ class AOTLinkedClassBulkLoader :  AllStatic {
  static void preload_classes_impl(TRAPS);
  static void preload_classes_in_table(Array<InstanceKlass*>* classes,
                                       const char* category_name, Handle loader, TRAPS);
-  static void initiate_loading(JavaThread* current, const char* category, Handle initiating_loader,
-                               Array<InstanceKlass*>* classes);
-  static void link_or_init_non_javabase_classes_impl(TRAPS);
-  static void link_or_init_classes_for_loader(Handle class_loader, Array<InstanceKlass*>* classes, TRAPS);
+  static void initiate_loading(JavaThread* current, const char* category, Handle initiating_loader, Array<InstanceKlass*>* classes);
+  static void link_classes_impl(TRAPS);
+  static void link_classes_in_table(Array<InstanceKlass*>* classes, TRAPS);
+  static void init_non_javabase_classes_impl(TRAPS);
+  static void init_classes_for_loader(Handle class_loader, Array<InstanceKlass*>* classes, TRAPS);
  static void replay_training_at_init(Array<InstanceKlass*>* classes, TRAPS) NOT_CDS_RETURN;

 #ifdef ASSERT
@ -67,9 +68,10 @@ class AOTLinkedClassBulkLoader :  AllStatic {

 public:
  static void serialize(SerializeClosure* soc) NOT_CDS_RETURN;
-  static void preload_classes(JavaThread* current);
-  static void link_or_init_javabase_classes(JavaThread* current) NOT_CDS_RETURN;
-  static void link_or_init_non_javabase_classes(JavaThread* current) NOT_CDS_RETURN;
+  static void preload_classes(JavaThread* current) NOT_CDS_RETURN;
+  static void link_classes(JavaThread* current) NOT_CDS_RETURN;
+  static void init_javabase_classes(JavaThread* current) NOT_CDS_RETURN;
+  static void init_non_javabase_classes(JavaThread* current) NOT_CDS_RETURN;
  static void exit_on_exception(JavaThread* current);

  static void replay_training_at_init_for_preloaded_classes(TRAPS) NOT_CDS_RETURN;
--- a/src/hotspot/share/cds/aotMapLogger.cpp
+++ b/src/hotspot/share/cds/aotMapLogger.cpp
@ -135,12 +135,14 @@ public:

  virtual bool do_unique_ref(Ref* ref, bool read_only) {
    ArchivedObjInfo info;
-    info._src_addr = ref->obj();
-    info._buffered_addr = ref->obj();
-    info._requested_addr = ref->obj();
-    info._bytes = ref->size() * BytesPerWord;
-    info._type = ref->msotype();
-    _objs.append(info);
+    if (AOTMetaspace::in_aot_cache(ref->obj())) {
+      info._src_addr = ref->obj();
+      info._buffered_addr = ref->obj();
+      info._requested_addr = ref->obj();
+      info._bytes = ref->size() * BytesPerWord;
+      info._type = ref->msotype();
+      _objs.append(info);
+    }

    return true; // keep iterating
  }
--- a/src/hotspot/share/cds/cdsConfig.cpp
+++ b/src/hotspot/share/cds/cdsConfig.cpp
@ -943,8 +943,9 @@ bool CDSConfig::is_preserving_verification_constraints() {
    return AOTClassLinking;
  } else if (is_dumping_final_static_archive()) { // writing AOT cache
    return is_dumping_aot_linked_classes();
+  } else if (is_dumping_classic_static_archive()) {
+    return is_dumping_aot_linked_classes();
  } else {
-    // For simplicity, we don't support this optimization with the old CDS workflow.
    return false;
  }
 }
--- a/src/hotspot/share/cds/cdsHeapVerifier.cpp
+++ b/src/hotspot/share/cds/cdsHeapVerifier.cpp
@ -28,6 +28,8 @@
 #include "classfile/classLoaderDataGraph.hpp"
 #include "classfile/javaClasses.inline.hpp"
 #include "classfile/moduleEntry.hpp"
+#include "classfile/symbolTable.hpp"
+#include "classfile/systemDictionary.hpp"
 #include "classfile/systemDictionaryShared.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "logging/log.hpp"
@ -153,9 +155,103 @@ CDSHeapVerifier::CDSHeapVerifier() : _archived_objs(0), _problems(0)

 # undef ADD_EXCL

+  if (CDSConfig::is_initing_classes_at_dump_time()) {
+    add_shared_secret_accessors();
+  }
  ClassLoaderDataGraph::classes_do(this);
 }

+// We allow only "stateless" accessors in the SharedSecrets class to be AOT-initialized, for example,
+// in the following pattern:
+//
+// class URL {
+//     static {
+//         SharedSecrets.setJavaNetURLAccess(
+//              new JavaNetURLAccess() { ... });
+//     }
+//
+// This initializes the field SharedSecrets::javaNetUriAccess, whose type (the inner case in the
+// above example) has no fields (static or otherwise) and is not a hidden class, so it cannot possibly
+// capture any transient state from the assembly phase that might become invalid in the production run.
+//
+class CDSHeapVerifier::SharedSecretsAccessorFinder : public FieldClosure {
+  CDSHeapVerifier* _verifier;
+  InstanceKlass* _ik;
+public:
+  SharedSecretsAccessorFinder(CDSHeapVerifier* verifier, InstanceKlass* ik)
+    : _verifier(verifier), _ik(ik) {}
+
+  void do_field(fieldDescriptor* fd) {
+    if (fd->field_type() == T_OBJECT) {
+      oop static_obj_field = _ik->java_mirror()->obj_field(fd->offset());
+      if (static_obj_field != nullptr) {
+        Klass* field_type = static_obj_field->klass();
+
+        if (!field_type->is_instance_klass()) {
+          ResourceMark rm;
+          log_error(aot, heap)("jdk.internal.access.SharedSecrets::%s must not be an array",
+                               fd->name()->as_C_string());
+          AOTMetaspace::unrecoverable_writing_error();
+        }
+
+        InstanceKlass* field_type_ik = InstanceKlass::cast(field_type);
+        if (has_any_fields(field_type_ik) || field_type_ik->is_hidden()) {
+          // If field_type_ik is a hidden class, the accessor is probably initialized using a
+          // Lambda, which may contain transient states.
+          ResourceMark rm;
+          log_error(aot, heap)("jdk.internal.access.SharedSecrets::%s (%s) must be stateless",
+                               fd->name()->as_C_string(), field_type_ik->external_name());
+          AOTMetaspace::unrecoverable_writing_error();
+        }
+
+        _verifier->add_shared_secret_accessor(static_obj_field);
+      }
+    }
+  }
+
+  // Does k (or any of its supertypes) have at least one (static or non-static) field?
+  static bool has_any_fields(InstanceKlass* k) {
+    if (k->static_field_size() != 0 || k->nonstatic_field_size() != 0) {
+      return true;
+    }
+
+    if (k->super() != nullptr && has_any_fields(k->super())) {
+      return true;
+    }
+
+    Array<InstanceKlass*>* interfaces = k->local_interfaces();
+    int num_interfaces = interfaces->length();
+    for (int index = 0; index < num_interfaces; index++) {
+      if (has_any_fields(interfaces->at(index))) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+};
+
+// This function is for allowing the following pattern in the core libraries:
+//
+//     public class URLClassPath {
+//          private static final JavaNetURLAccess JNUA = SharedSecrets.getJavaNetURLAccess();
+//
+// SharedSecrets::javaNetUriAccess has no states so it can be safely AOT-initialized. During
+// the production run, even if URLClassPath.<clinit> is re-executed, it will get back the same
+// instance of javaNetUriAccess as it did during the assembly phase.
+//
+// Note: this will forbid complex accessors such as SharedSecrets::javaObjectInputFilterAccess
+// to be initialized during the AOT assembly phase.
+void CDSHeapVerifier::add_shared_secret_accessors() {
+  TempNewSymbol klass_name = SymbolTable::new_symbol("jdk/internal/access/SharedSecrets");
+  InstanceKlass* ik = SystemDictionary::find_instance_klass(Thread::current(), klass_name,
+                                                           Handle());
+  assert(ik != nullptr, "must have been loaded");
+
+  SharedSecretsAccessorFinder finder(this, ik);
+  ik->do_local_static_fields(&finder);
+}
+
 CDSHeapVerifier::~CDSHeapVerifier() {
  if (_problems > 0) {
    log_error(aot, heap)("Scanned %d objects. Found %d case(s) where "
@ -181,13 +277,12 @@ public:
      return;
    }

-    if (fd->signature()->equals("Ljdk/internal/access/JavaLangAccess;")) {
-      // A few classes have static fields that point to SharedSecrets.getJavaLangAccess().
-      // This object carries no state and we can create a new one in the production run.
-      return;
-    }
    oop static_obj_field = _ik->java_mirror()->obj_field(fd->offset());
    if (static_obj_field != nullptr) {
+      if (_verifier->is_shared_secret_accessor(static_obj_field)) {
+        return;
+      }
+
      Klass* field_type = static_obj_field->klass();
      if (_exclusions != nullptr) {
        for (const char** p = _exclusions; *p != nullptr; p++) {
--- a/src/hotspot/share/cds/cdsHeapVerifier.hpp
+++ b/src/hotspot/share/cds/cdsHeapVerifier.hpp
@ -38,6 +38,7 @@ class Symbol;

 class CDSHeapVerifier : public KlassClosure {
  class CheckStaticFields;
+  class SharedSecretsAccessorFinder;
  class TraceFields;

  int _archived_objs;
@ -55,6 +56,7 @@ class CDSHeapVerifier : public KlassClosure {
      HeapShared::oop_hash> _table;

  GrowableArray<const char**> _exclusions;
+  GrowableArray<oop> _shared_secret_accessors;

  void add_exclusion(const char** excl) {
    _exclusions.append(excl);
@ -70,6 +72,22 @@ class CDSHeapVerifier : public KlassClosure {
    }
    return nullptr;
  }
+
+  void add_shared_secret_accessors();
+
+  void add_shared_secret_accessor(oop obj) {
+    _shared_secret_accessors.append(obj);
+  }
+
+  bool is_shared_secret_accessor(oop obj) {
+    for (int i = 0; i < _shared_secret_accessors.length(); i++) {
+      if (_shared_secret_accessors.at(i) == obj) {
+        return true;
+      }
+    }
+    return false;
+  }
+
  static int trace_to_root(outputStream* st, oop orig_obj, oop orig_field, HeapShared::CachedOopInfo* p);

  CDSHeapVerifier();
--- a/src/hotspot/share/cds/classListWriter.cpp
+++ b/src/hotspot/share/cds/classListWriter.cpp
@ -277,7 +277,9 @@ void ClassListWriter::write_resolved_constants_for(InstanceKlass* ik) {
    if (field_entries != nullptr) {
      for (int i = 0; i < field_entries->length(); i++) {
        ResolvedFieldEntry* rfe = field_entries->adr_at(i);
-        if (rfe->is_resolved(Bytecodes::_getfield) ||
+        if (rfe->is_resolved(Bytecodes::_getstatic) ||
+            rfe->is_resolved(Bytecodes::_putstatic) ||
+            rfe->is_resolved(Bytecodes::_getfield) ||
            rfe->is_resolved(Bytecodes::_putfield)) {
          list.at_put(rfe->constant_pool_index(), true);
          print = true;
@ -292,6 +294,7 @@ void ClassListWriter::write_resolved_constants_for(InstanceKlass* ik) {
        if (rme->is_resolved(Bytecodes::_invokevirtual) ||
            rme->is_resolved(Bytecodes::_invokespecial) ||
            rme->is_resolved(Bytecodes::_invokeinterface) ||
+            rme->is_resolved(Bytecodes::_invokestatic) ||
            rme->is_resolved(Bytecodes::_invokehandle)) {
          list.at_put(rme->constant_pool_index(), true);
          print = true;
--- a/src/hotspot/share/cds/filemap.cpp
+++ b/src/hotspot/share/cds/filemap.cpp
@ -117,7 +117,6 @@ template <int N> static void get_header_version(char (&header_version) [N]) {

    // Append the hash code as eight hex digits.
    os::snprintf_checked(&header_version[JVM_IDENT_MAX-9], 9, "%08x", hash);
-    header_version[JVM_IDENT_MAX-1] = 0;  // Null terminate.
  }

  assert(header_version[JVM_IDENT_MAX-1] == 0, "must be");
--- a/src/hotspot/share/cds/finalImageRecipes.cpp
+++ b/src/hotspot/share/cds/finalImageRecipes.cpp
@ -89,7 +89,9 @@ void FinalImageRecipes::record_recipes_for_constantpool() {
        if (field_entries != nullptr) {
          for (int i = 0; i < field_entries->length(); i++) {
            ResolvedFieldEntry* rfe = field_entries->adr_at(i);
-            if (rfe->is_resolved(Bytecodes::_getfield) ||
+            if (rfe->is_resolved(Bytecodes::_getstatic) ||
+                rfe->is_resolved(Bytecodes::_putstatic) ||
+                rfe->is_resolved(Bytecodes::_getfield) ||
                rfe->is_resolved(Bytecodes::_putfield)) {
              cp_indices.append(rfe->constant_pool_index());
              flags |= CP_RESOLVE_FIELD_AND_METHOD;
@ -127,6 +129,14 @@ void FinalImageRecipes::record_recipes_for_constantpool() {
    }

    if (cp_indices.length() > 0) {
+      LogStreamHandle(Trace, aot, resolve) log;
+      if (log.is_enabled()) {
+        log.print("ConstantPool entries for %s to be pre-resolved:", k->external_name());
+        for (int i = 0; i < cp_indices.length(); i++) {
+          log.print(" %d", cp_indices.at(i));
+        }
+        log.print("\n");
+      }
      tmp_cp_recipes.append(ArchiveUtils::archive_array(&cp_indices));
    } else {
      tmp_cp_recipes.append(nullptr);
--- a/src/hotspot/share/cds/runTimeClassInfo.cpp
+++ b/src/hotspot/share/cds/runTimeClassInfo.cpp
@ -41,7 +41,7 @@ void RunTimeClassInfo::init(DumpTimeClassInfo& info) {
  _num_loader_constraints   = info.num_loader_constraints();
  int i;

-  if (CDSConfig::is_preserving_verification_constraints() && CDSConfig::is_dumping_final_static_archive()) {
+  if (CDSConfig::is_preserving_verification_constraints()) {
    // The production run doesn't need the verifier constraints, as we can guarantee that all classes checked by
    // the verifier during AOT training/assembly phases cannot be replaced in the production run.
    _num_verifier_constraints = 0;
--- a/src/hotspot/share/classfile/stackMapTable.cpp
+++ b/src/hotspot/share/classfile/stackMapTable.cpp
@ -132,8 +132,16 @@ bool StackMapTable::match_stackmap(
 }

 void StackMapTable::check_jump_target(
-    StackMapFrame* frame, int32_t target, TRAPS) const {
+    StackMapFrame* frame, int bci, int offset, TRAPS) const {
  ErrorContext ctx;
+  // Jump targets must be within the method and the method size is limited. See JVMS 4.11
+  int min_offset = -1 * max_method_code_size;
+  if (offset < min_offset || offset > max_method_code_size) {
+    frame->verifier()->verify_error(ErrorContext::bad_stackmap(bci, frame),
+        "Illegal target of jump or branch (bci %d + offset %d)", bci, offset);
+    return;
+  }
+  int target = bci + offset;
  bool match = match_stackmap(
    frame, target, true, false, &ctx, CHECK_VERIFY(frame->verifier()));
  if (!match || (target < 0 || target >= _code_length)) {
--- a/src/hotspot/share/classfile/stackMapTable.hpp
+++ b/src/hotspot/share/classfile/stackMapTable.hpp
@ -67,7 +67,7 @@ class StackMapTable : public StackObj {

  // Check jump instructions. Make sure there are no uninitialized
  // instances on backward branch.
-  void check_jump_target(StackMapFrame* frame, int32_t target, TRAPS) const;
+  void check_jump_target(StackMapFrame* frame, int bci, int offset, TRAPS) const;

  // The following methods are only used inside this class.

--- a/src/hotspot/share/classfile/systemDictionaryShared.cpp
+++ b/src/hotspot/share/classfile/systemDictionaryShared.cpp
@ -855,6 +855,28 @@ public:
  }
 };

+void SystemDictionaryShared::link_all_exclusion_check_candidates(InstanceKlass* ik) {
+  bool need_to_link = false;
+  {
+    MutexLocker ml(DumpTimeTable_lock, Mutex::_no_safepoint_check_flag);
+    ExclusionCheckCandidates candidates(ik);
+
+    candidates.iterate_all([&] (InstanceKlass* k, DumpTimeClassInfo* info) {
+      if (!k->is_linked()) {
+        need_to_link = true;
+      }
+    });
+  }
+  if (need_to_link) {
+    JavaThread* THREAD = JavaThread::current();
+    if (log_is_enabled(Info, aot, link)) {
+      ResourceMark rm(THREAD);
+      log_info(aot, link)("Link all loaded classes for %s", ik->external_name());
+    }
+    AOTMetaspace::link_all_loaded_classes(THREAD);
+  }
+}
+
 // Returns true if the class should be excluded. This can be called by
 // AOTConstantPoolResolver before or after we enter the CDS safepoint.
 // When called before the safepoint, we need to link the class so that
@ -878,27 +900,19 @@ bool SystemDictionaryShared::should_be_excluded(Klass* k) {
    InstanceKlass* ik = InstanceKlass::cast(k);

    if (!SafepointSynchronize::is_at_safepoint()) {
-      if (!ik->is_linked()) {
-        // should_be_excluded_impl() below doesn't link unlinked classes. We come
-        // here only when we are trying to aot-link constant pool entries, so
-        // we'd better link the class.
-        JavaThread* THREAD = JavaThread::current();
-        ik->link_class(THREAD);
-        if (HAS_PENDING_EXCEPTION) {
-          CLEAR_PENDING_EXCEPTION;
-          return true; // linking failed -- let's exclude it
+      {
+        // fast path
+        MutexLocker ml(DumpTimeTable_lock, Mutex::_no_safepoint_check_flag);
+        DumpTimeClassInfo* p = get_info_locked(ik);
+        if (p->has_checked_exclusion()) {
+          return p->is_excluded();
        }
-
-        // Also link any classes that were loaded for the verification of ik or its supertypes.
-        // Otherwise we might miss the verification constraints of those classes.
-        AOTMetaspace::link_all_loaded_classes(THREAD);
      }

+      link_all_exclusion_check_candidates(ik);
+
      MutexLocker ml(DumpTimeTable_lock, Mutex::_no_safepoint_check_flag);
      DumpTimeClassInfo* p = get_info_locked(ik);
-      if (p->is_excluded()) {
-        return true;
-      }
      return should_be_excluded_impl(ik, p);
    } else {
      // When called within the CDS safepoint, the correctness of this function
@ -912,7 +926,7 @@ bool SystemDictionaryShared::should_be_excluded(Klass* k) {

      // No need to check for is_linked() as all eligible classes should have
      // already been linked in AOTMetaspace::link_class_for_cds().
-      // Can't take the lock as we are in safepoint.
+      // Don't take DumpTimeTable_lock as we are in safepoint.
      DumpTimeClassInfo* p = _dumptime_table->get(ik);
      if (p->is_excluded()) {
        return true;
--- a/src/hotspot/share/classfile/systemDictionaryShared.hpp
+++ b/src/hotspot/share/classfile/systemDictionaryShared.hpp
@ -175,6 +175,7 @@ private:
  static void write_dictionary(RunTimeSharedDictionary* dictionary,
                               bool is_builtin);
  static bool is_jfr_event_class(InstanceKlass *k);
+  static void link_all_exclusion_check_candidates(InstanceKlass* ik);
  static bool should_be_excluded_impl(InstanceKlass* k, DumpTimeClassInfo* info);

  // exclusion checks
--- a/src/hotspot/share/classfile/verifier.cpp
+++ b/src/hotspot/share/classfile/verifier.cpp
@ -781,7 +781,6 @@ void ClassVerifier::verify_method(const methodHandle& m, TRAPS) {

    // Merge with the next instruction
    {
-      int target;
      VerificationType type, type2;
      VerificationType atype;

@ -1606,9 +1605,8 @@ void ClassVerifier::verify_method(const methodHandle& m, TRAPS) {
        case Bytecodes::_ifle:
          current_frame.pop_stack(
            VerificationType::integer_type(), CHECK_VERIFY(this));
-          target = bcs.dest();
          stackmap_table.check_jump_target(
-            &current_frame, target, CHECK_VERIFY(this));
+            &current_frame, bcs.bci(), bcs.get_offset_s2(), CHECK_VERIFY(this));
          no_control_flow = false; break;
        case Bytecodes::_if_acmpeq :
        case Bytecodes::_if_acmpne :
@ -1619,19 +1617,16 @@ void ClassVerifier::verify_method(const methodHandle& m, TRAPS) {
        case Bytecodes::_ifnonnull :
          current_frame.pop_stack(
            VerificationType::reference_check(), CHECK_VERIFY(this));
-          target = bcs.dest();
          stackmap_table.check_jump_target
-            (&current_frame, target, CHECK_VERIFY(this));
+            (&current_frame, bcs.bci(), bcs.get_offset_s2(), CHECK_VERIFY(this));
          no_control_flow = false; break;
        case Bytecodes::_goto :
-          target = bcs.dest();
          stackmap_table.check_jump_target(
-            &current_frame, target, CHECK_VERIFY(this));
+            &current_frame, bcs.bci(), bcs.get_offset_s2(), CHECK_VERIFY(this));
          no_control_flow = true; break;
        case Bytecodes::_goto_w :
-          target = bcs.dest_w();
          stackmap_table.check_jump_target(
-            &current_frame, target, CHECK_VERIFY(this));
+            &current_frame, bcs.bci(), bcs.get_offset_s4(), CHECK_VERIFY(this));
          no_control_flow = true; break;
        case Bytecodes::_tableswitch :
        case Bytecodes::_lookupswitch :
@ -2280,15 +2275,14 @@ void ClassVerifier::verify_switch(
      }
    }
  }
-  int target = bci + default_offset;
-  stackmap_table->check_jump_target(current_frame, target, CHECK_VERIFY(this));
+  stackmap_table->check_jump_target(current_frame, bci, default_offset, CHECK_VERIFY(this));
  for (int i = 0; i < keys; i++) {
    // Because check_jump_target() may safepoint, the bytecode could have
    // moved, which means 'aligned_bcp' is no good and needs to be recalculated.
    aligned_bcp = align_up(bcs->bcp() + 1, jintSize);
-    target = bci + (jint)Bytes::get_Java_u4(aligned_bcp+(3+i*delta)*jintSize);
+    int offset = (jint)Bytes::get_Java_u4(aligned_bcp+(3+i*delta)*jintSize);
    stackmap_table->check_jump_target(
-      current_frame, target, CHECK_VERIFY(this));
+      current_frame, bci, offset, CHECK_VERIFY(this));
  }
  NOT_PRODUCT(aligned_bcp = nullptr);  // no longer valid at this point
 }
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@ -467,8 +467,8 @@ class methodHandle;
  do_intrinsic(_Reference_clear0,           java_lang_ref_Reference, clear0_name,    void_method_signature, F_RN)       \
  do_intrinsic(_PhantomReference_clear0,    java_lang_ref_PhantomReference, clear0_name, void_method_signature, F_RN)   \
                                                                                                                        \
-  /* support for com.sun.crypto.provider.AESCrypt and some of its callers */                                            \
-  do_class(com_sun_crypto_provider_aescrypt,      "com/sun/crypto/provider/AESCrypt")                                   \
+  /* support for com.sun.crypto.provider.AES_Crypt and some of its callers */                                            \
+  do_class(com_sun_crypto_provider_aescrypt,      "com/sun/crypto/provider/AES_Crypt")                                   \
  do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R)   \
  do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R)   \
   do_name(     encryptBlock_name,                                 "implEncryptBlock")                                  \
--- a/src/hotspot/share/code/codeBlob.cpp
+++ b/src/hotspot/share/code/codeBlob.cpp
@ -910,6 +910,7 @@ void CodeBlob::dump_for_addr(address addr, outputStream* st, bool verbose) const
      nm->print_nmethod(true);
    } else {
      nm->print_on(st);
+      nm->print_code_snippet(st, addr);
    }
    return;
  }
--- a/src/hotspot/share/code/nmethod.cpp
+++ b/src/hotspot/share/code/nmethod.cpp
@ -23,6 +23,7 @@
 */

 #include "asm/assembler.inline.hpp"
+#include "cds/cdsConfig.hpp"
 #include "code/codeCache.hpp"
 #include "code/compiledIC.hpp"
 #include "code/dependencies.hpp"
@ -1147,7 +1148,7 @@ nmethod* nmethod::new_nmethod(const methodHandle& method,
    + align_up(speculations_len                  , oopSize)
 #endif
    + align_up(debug_info->data_size()           , oopSize)
-    + align_up(ImmutableDataReferencesCounterSize, oopSize);
+    + ImmutableDataReferencesCounterSize;

  // First, allocate space for immutable data in C heap.
  address immutable_data = nullptr;
@ -1322,6 +1323,7 @@ nmethod::nmethod(
 #if INCLUDE_JVMCI
    _speculations_offset     = 0;
 #endif
+    _immutable_data_reference_counter_offset = 0;

    code_buffer->copy_code_and_locs_to(this);
    code_buffer->copy_values_to(this);
@ -1420,15 +1422,6 @@ nmethod::nmethod(const nmethod &nm) : CodeBlob(nm._name, nm._kind, nm._size, nm.
  _method                       = nm._method;
  _osr_link                     = nullptr;

-  // Increment number of references to immutable data to share it between nmethods
-  _immutable_data_size          = nm._immutable_data_size;
-  if (_immutable_data_size > 0) {
-    _immutable_data             = nm._immutable_data;
-    set_immutable_data_references_counter(get_immutable_data_references_counter() + 1);
-  } else {
-    _immutable_data             = blob_end();
-  }
-
  _exception_cache              = nullptr;
  _gc_data                      = nullptr;
  _oops_do_mark_nmethods        = nullptr;
@ -1444,6 +1437,7 @@ nmethod::nmethod(const nmethod &nm) : CodeBlob(nm._name, nm._kind, nm._size, nm.
  _entry_offset                 = nm._entry_offset;
  _verified_entry_offset        = nm._verified_entry_offset;
  _entry_bci                    = nm._entry_bci;
+  _immutable_data_size          = nm._immutable_data_size;

  _skipped_instructions_size    = nm._skipped_instructions_size;
  _stub_offset                  = nm._stub_offset;
@ -1462,6 +1456,15 @@ nmethod::nmethod(const nmethod &nm) : CodeBlob(nm._name, nm._kind, nm._size, nm.
 #if INCLUDE_JVMCI
  _speculations_offset          = nm._speculations_offset;
 #endif
+  _immutable_data_reference_counter_offset = nm._immutable_data_reference_counter_offset;
+
+  // Increment number of references to immutable data to share it between nmethods
+  if (_immutable_data_size > 0) {
+    _immutable_data             = nm._immutable_data;
+    set_immutable_data_references_counter(get_immutable_data_references_counter() + 1);
+  } else {
+    _immutable_data             = blob_end();
+  }

  _orig_pc_offset               = nm._orig_pc_offset;
  _compile_id                   = nm._compile_id;
@ -1751,9 +1754,11 @@ nmethod::nmethod(

 #if INCLUDE_JVMCI
    _speculations_offset  = _scopes_data_offset   + align_up(debug_info->data_size(), oopSize);
-    DEBUG_ONLY( int immutable_data_end_offset = _speculations_offset + align_up(speculations_len, oopSize) + align_up(ImmutableDataReferencesCounterSize, oopSize); )
+    _immutable_data_reference_counter_offset = _speculations_offset + align_up(speculations_len, oopSize);
+    DEBUG_ONLY( int immutable_data_end_offset = _immutable_data_reference_counter_offset + ImmutableDataReferencesCounterSize; )
 #else
-    DEBUG_ONLY( int immutable_data_end_offset = _scopes_data_offset + align_up(debug_info->data_size(), oopSize) + align_up(ImmutableDataReferencesCounterSize, oopSize); )
+    _immutable_data_reference_counter_offset =  _scopes_data_offset + align_up(debug_info->data_size(), oopSize);
+    DEBUG_ONLY( int immutable_data_end_offset = _immutable_data_reference_counter_offset + ImmutableDataReferencesCounterSize; )
 #endif
    assert(immutable_data_end_offset <= immutable_data_size, "wrong read-only data size: %d > %d",
           immutable_data_end_offset, immutable_data_size);
@ -2500,11 +2505,48 @@ void nmethod::post_compiled_method(CompileTask* task) {
  maybe_print_nmethod(directive);
 }

+#if INCLUDE_CDS
+static GrowableArrayCHeap<nmethod*, mtClassShared>* _delayed_compiled_method_load_events = nullptr;
+
+void nmethod::add_delayed_compiled_method_load_event(nmethod* nm) {
+  precond(CDSConfig::is_using_aot_linked_classes());
+  precond(!ServiceThread::has_started());
+
+  // We are still in single threaded stage of VM bootstrap. No need to lock.
+  if (_delayed_compiled_method_load_events == nullptr) {
+    _delayed_compiled_method_load_events = new GrowableArrayCHeap<nmethod*, mtClassShared>();
+  }
+  _delayed_compiled_method_load_events->append(nm);
+}
+
+void nmethod::post_delayed_compiled_method_load_events() {
+  precond(ServiceThread::has_started());
+  if (_delayed_compiled_method_load_events != nullptr) {
+    for (int i = 0; i < _delayed_compiled_method_load_events->length(); i++) {
+      nmethod* nm = _delayed_compiled_method_load_events->at(i);
+      nm->post_compiled_method_load_event();
+    }
+    delete _delayed_compiled_method_load_events;
+    _delayed_compiled_method_load_events = nullptr;
+  }
+}
+#endif
+
 // ------------------------------------------------------------------
 // post_compiled_method_load_event
 // new method for install_code() path
 // Transfer information from compilation to jvmti
 void nmethod::post_compiled_method_load_event(JvmtiThreadState* state) {
+#if INCLUDE_CDS
+  if (!ServiceThread::has_started()) {
+    // With AOT-linked classes, we could compile wrappers for native methods before the
+    // ServiceThread has been started, so we must delay the events to be posted later.
+    assert(state == nullptr, "must be");
+    add_delayed_compiled_method_load_event(this);
+    return;
+  }
+#endif
+
  // This is a bad time for a safepoint.  We don't want
  // this nmethod to get unloaded while we're queueing the event.
  NoSafepointVerifier nsv;
@ -2596,7 +2638,7 @@ void nmethod::metadata_do(MetadataClosure* f) {
 // Main purpose is to reduce code cache pressure and get rid of
 // nmethods that don't seem to be all that relevant any longer.
 bool nmethod::is_cold() {
-  if (!MethodFlushing || is_native_method() || is_not_installed()) {
+  if (!MethodFlushing || is_not_installed()) {
    // No heuristic unloading at all
    return false;
  }
@ -4266,6 +4308,46 @@ void nmethod::print_value_on_impl(outputStream* st) const {
 #endif
 }

+void nmethod::print_code_snippet(outputStream* st, address addr) const {
+  if (entry_point() <= addr && addr < code_end()) {
+    // Pointing into the nmethod's code. Try to disassemble some instructions around addr.
+    // Determine conservative start and end points.
+    address start;
+    if (frame_complete_offset() != CodeOffsets::frame_never_safe &&
+        addr >= code_begin() + frame_complete_offset()) {
+      start = code_begin() + frame_complete_offset();
+    } else {
+      start = (addr < verified_entry_point()) ? entry_point() : verified_entry_point();
+    }
+    address start_for_hex_dump = start; // We can choose a different starting point for hex dump, below.
+    address end = code_end();
+
+    // Try using relocations to find closer instruction start and end points.
+    // (Some platforms have variable length instructions and can only
+    // disassemble correctly at instruction start addresses.)
+    RelocIterator iter((nmethod*)this, start);
+    while (iter.next() && iter.addr() < addr) { // find relocation before addr
+      // Note: There's a relocation which doesn't point to an instruction start:
+      // ZBarrierRelocationFormatStoreGoodAfterMov with ZGC on x86_64
+      // We could detect and skip it, but hex dump is still usable when
+      // disassembler produces garbage in such a very rare case.
+      start = iter.addr();
+      // We want at least 64 Bytes ahead in hex dump.
+      if (iter.addr() <= (addr - 64)) start_for_hex_dump = iter.addr();
+    }
+    if (iter.has_current()) {
+      if (iter.addr() == addr) iter.next(); // find relocation after addr
+      if (iter.has_current()) end = iter.addr();
+    }
+
+    // Always print hex. Disassembler may still have problems when hitting an incorrect instruction start.
+    os::print_hex_dump(st, start_for_hex_dump, end, 1, /* print_ascii=*/false);
+    if (!Disassembler::is_abstract()) {
+      Disassembler::decode(start, end, st);
+    }
+  }
+}
+
 #ifndef PRODUCT

 void nmethod::print_calls(outputStream* st) {
--- a/src/hotspot/share/code/nmethod.hpp
+++ b/src/hotspot/share/code/nmethod.hpp
@ -250,6 +250,7 @@ class nmethod : public CodeBlob {
 #if INCLUDE_JVMCI
  int      _speculations_offset;
 #endif
+  int      _immutable_data_reference_counter_offset;

  // location in frame (offset for sp) that deopt can store the original
  // pc during a deopt.
@ -646,12 +647,11 @@ public:
 #if INCLUDE_JVMCI
  address scopes_data_end       () const { return           _immutable_data + _speculations_offset ; }
  address speculations_begin    () const { return           _immutable_data + _speculations_offset ; }
-  address speculations_end      () const { return           immutable_data_end() - ImmutableDataReferencesCounterSize ; }
+  address speculations_end      () const { return           _immutable_data + _immutable_data_reference_counter_offset ; }
 #else
-  address scopes_data_end       () const { return           immutable_data_end() - ImmutableDataReferencesCounterSize ; }
+  address scopes_data_end       () const { return           _immutable_data + _immutable_data_reference_counter_offset ; }
 #endif
-
-  address immutable_data_references_counter_begin () const { return immutable_data_end() - ImmutableDataReferencesCounterSize ; }
+  address immutable_data_references_counter_begin () const { return _immutable_data + _immutable_data_reference_counter_offset ; }

  // Sizes
  int immutable_data_size() const { return _immutable_data_size; }
@ -965,6 +965,8 @@ public:
  inline int  get_immutable_data_references_counter()           { return *((int*)immutable_data_references_counter_begin());  }
  inline void set_immutable_data_references_counter(int count)  { *((int*)immutable_data_references_counter_begin()) = count; }

+  static void add_delayed_compiled_method_load_event(nmethod* nm) NOT_CDS_RETURN;
+
 public:
  // ScopeDesc retrieval operation
  PcDesc* pc_desc_at(address pc)   { return find_pc_desc(pc, false); }
@ -999,10 +1001,14 @@ public:
  // Avoid hiding of parent's 'decode(outputStream*)' method.
  void decode(outputStream* st) const { decode2(st); } // just delegate here.

+  // AOT cache support
+  static void post_delayed_compiled_method_load_events() NOT_CDS_RETURN;
+
  // printing support
  void print_on_impl(outputStream* st) const;
  void print_code();
  void print_value_on_impl(outputStream* st) const;
+  void print_code_snippet(outputStream* st, address addr) const;

 #if defined(SUPPORT_DATA_STRUCTS)
  // print output in opt build for disassembler library
--- a/src/hotspot/share/compiler/compilationPolicy.cpp
+++ b/src/hotspot/share/compiler/compilationPolicy.cpp
@ -423,13 +423,16 @@ void CompilationPolicy::print_counters_on(outputStream* st, const char* prefix,
  st->print(" %smax levels=%d,%d", prefix, m->highest_comp_level(), m->highest_osr_comp_level());
 }

-void CompilationPolicy::print_training_data_on(outputStream* st,  const char* prefix, Method* method) {
+void CompilationPolicy::print_training_data_on(outputStream* st,  const char* prefix, Method* method, CompLevel cur_level) {
  methodHandle m(Thread::current(), method);
  st->print(" %smtd: ", prefix);
  MethodTrainingData* mtd = MethodTrainingData::find(m);
  if (mtd == nullptr) {
    st->print("null");
  } else {
+    if (should_delay_standard_transition(m, cur_level, mtd)) {
+      st->print("delayed, ");
+    }
    MethodData* md = mtd->final_profile();
    st->print("mdo=");
    if (md == nullptr) {
@ -536,9 +539,9 @@ void CompilationPolicy::print_event_on(outputStream *st, EventType type, Method*
      st->print("in-queue");
    } else st->print("idle");

-    print_training_data_on(st, "", m);
+    print_training_data_on(st, "", m, level);
    if (inlinee_event) {
-      print_training_data_on(st, "inlinee ", im);
+      print_training_data_on(st, "inlinee ", im, level);
    }
  }
  st->print_cr("]");
@ -1153,7 +1156,7 @@ CompLevel CompilationPolicy::trained_transition_from_none(const methodHandle& me
  // Now handle the case of level 4.
  assert(highest_training_level == CompLevel_full_optimization, "Unexpected compilation level: %d", highest_training_level);
  if (!training_has_profile) {
-    // The method was a part of a level 4 compile, but don't have a stored profile,
+    // The method was a part of a level 4 compile, but doesn't have a stored profile,
    // we need to profile it.
    return CompLevel_full_profile;
  }
@ -1308,33 +1311,53 @@ CompLevel CompilationPolicy::common(const methodHandle& method, CompLevel cur_le
    if (mtd == nullptr) {
      // We haven't see compilations of this method in training. It's either very cold or the behavior changed.
      // Feed it to the standard TF with no profiling delay.
-      next_level = standard_transition<Predicate>(method, cur_level, false /*delay_profiling*/, disable_feedback);
+      next_level = standard_transition<Predicate>(method, cur_level, disable_feedback);
    } else {
      next_level = trained_transition(method, cur_level, mtd, THREAD);
-      if (cur_level == next_level) {
+      if (cur_level == next_level && !should_delay_standard_transition(method, cur_level, mtd)) {
        // trained_transtion() is going to return the same level if no startup/warmup optimizations apply.
        // In order to catch possible pathologies due to behavior change we feed the event to the regular
        // TF but with profiling delay.
-        next_level = standard_transition<Predicate>(method, cur_level, true /*delay_profiling*/, disable_feedback);
+        next_level = standard_transition<Predicate>(method, cur_level, disable_feedback);
      }
    }
  } else {
-    next_level = standard_transition<Predicate>(method, cur_level, false /*delay_profiling*/, disable_feedback);
+    next_level = standard_transition<Predicate>(method, cur_level, disable_feedback);
  }
  return (next_level != cur_level) ? limit_level(next_level) : next_level;
 }

+bool CompilationPolicy::should_delay_standard_transition(const methodHandle& method, CompLevel cur_level, MethodTrainingData* mtd) {
+  precond(mtd != nullptr);
+  CompLevel highest_training_level = static_cast<CompLevel>(mtd->highest_top_level());
+  if (highest_training_level != CompLevel_full_optimization && cur_level == CompLevel_limited_profile) {
+    // This is a lukewarm method - it hasn't been compiled with C2 during the tranining run and is currently
+    // running at level 2. Delay any further state changes until its counters exceed the training run counts.
+    MethodCounters* mc = method->method_counters();
+    if (mc == nullptr) {
+      return false;
+    }
+    if (mc->invocation_counter()->carry() || mc->backedge_counter()->carry()) {
+      return false;
+    }
+    if (static_cast<int>(mc->invocation_counter()->count()) <= mtd->invocation_count() &&
+        static_cast<int>(mc->backedge_counter()->count()) <= mtd->backedge_count()) {
+      return true;
+    }
+  }
+  return false;
+}

 template<typename Predicate>
-CompLevel CompilationPolicy::standard_transition(const methodHandle& method, CompLevel cur_level, bool delay_profiling, bool disable_feedback) {
+CompLevel CompilationPolicy::standard_transition(const methodHandle& method, CompLevel cur_level, bool disable_feedback) {
  CompLevel next_level = cur_level;
  switch(cur_level) {
  default: break;
  case CompLevel_none:
-    next_level = transition_from_none<Predicate>(method, cur_level, delay_profiling, disable_feedback);
+    next_level = transition_from_none<Predicate>(method, cur_level, disable_feedback);
    break;
  case CompLevel_limited_profile:
-    next_level = transition_from_limited_profile<Predicate>(method, cur_level, delay_profiling, disable_feedback);
+    next_level = transition_from_limited_profile<Predicate>(method, cur_level, disable_feedback);
    break;
  case CompLevel_full_profile:
    next_level = transition_from_full_profile<Predicate>(method, cur_level);
@ -1343,16 +1366,8 @@ CompLevel CompilationPolicy::standard_transition(const methodHandle& method, Com
  return next_level;
 }

-template<typename Predicate> static inline bool apply_predicate(const methodHandle& method, CompLevel cur_level, int i, int b, bool delay_profiling, double delay_profiling_scale) {
-  if (delay_profiling) {
-    return Predicate::apply_scaled(method, cur_level, i, b, delay_profiling_scale);
-  } else {
-    return Predicate::apply(method, cur_level, i, b);
-  }
-}
-
 template<typename Predicate>
-CompLevel CompilationPolicy::transition_from_none(const methodHandle& method, CompLevel cur_level, bool delay_profiling, bool disable_feedback) {
+CompLevel CompilationPolicy::transition_from_none(const methodHandle& method, CompLevel cur_level, bool disable_feedback) {
  precond(cur_level == CompLevel_none);
  CompLevel next_level = cur_level;
  int i = method->invocation_count();
@ -1360,7 +1375,7 @@ CompLevel CompilationPolicy::transition_from_none(const methodHandle& method, Co
  // If we were at full profile level, would we switch to full opt?
  if (transition_from_full_profile<Predicate>(method, CompLevel_full_profile) == CompLevel_full_optimization) {
    next_level = CompLevel_full_optimization;
-  } else if (!CompilationModeFlag::disable_intermediate() && apply_predicate<Predicate>(method, cur_level, i, b, delay_profiling, Tier0ProfileDelayFactor)) {
+  } else if (!CompilationModeFlag::disable_intermediate() && Predicate::apply(method, cur_level, i, b)) {
    // C1-generated fully profiled code is about 30% slower than the limited profile
    // code that has only invocation and backedge counters. The observation is that
    // if C2 queue is large enough we can spend too much time in the fully profiled code
@ -1368,7 +1383,7 @@ CompLevel CompilationPolicy::transition_from_none(const methodHandle& method, Co
    // we introduce a feedback on the C2 queue size. If the C2 queue is sufficiently long
    // we choose to compile a limited profiled version and then recompile with full profiling
    // when the load on C2 goes down.
-    if (delay_profiling || (!disable_feedback && CompileBroker::queue_size(CompLevel_full_optimization) > Tier3DelayOn * compiler_count(CompLevel_full_optimization))) {
+    if (!disable_feedback && CompileBroker::queue_size(CompLevel_full_optimization) > Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
      next_level = CompLevel_limited_profile;
    } else {
      next_level = CompLevel_full_profile;
@ -1397,7 +1412,7 @@ CompLevel CompilationPolicy::transition_from_full_profile(const methodHandle& me
 }

 template<typename Predicate>
-CompLevel CompilationPolicy::transition_from_limited_profile(const methodHandle& method, CompLevel cur_level, bool delay_profiling, bool disable_feedback) {
+CompLevel CompilationPolicy::transition_from_limited_profile(const methodHandle& method, CompLevel cur_level, bool disable_feedback) {
  precond(cur_level == CompLevel_limited_profile);
  CompLevel next_level = cur_level;
  int i = method->invocation_count();
@ -1407,7 +1422,7 @@ CompLevel CompilationPolicy::transition_from_limited_profile(const methodHandle&
    if (mdo->would_profile()) {
      if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <=
                              Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
-                              apply_predicate<Predicate>(method, cur_level, i, b, delay_profiling, Tier2ProfileDelayFactor))) {
+                              Predicate::apply(method, cur_level, i, b))) {
        next_level = CompLevel_full_profile;
      }
    } else {
@ -1417,7 +1432,7 @@ CompLevel CompilationPolicy::transition_from_limited_profile(const methodHandle&
    // If there is no MDO we need to profile
    if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <=
                            Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
-                            apply_predicate<Predicate>(method, cur_level, i, b, delay_profiling, Tier2ProfileDelayFactor))) {
+                            Predicate::apply(method, cur_level, i, b))) {
      next_level = CompLevel_full_profile;
    }
  }
--- a/src/hotspot/share/compiler/compilationPolicy.hpp
+++ b/src/hotspot/share/compiler/compilationPolicy.hpp
@ -263,14 +263,15 @@ class CompilationPolicy : AllStatic {
  static CompLevel common(const methodHandle& method, CompLevel cur_level, JavaThread* THREAD, bool disable_feedback = false);

  template<typename Predicate>
-  static CompLevel transition_from_none(const methodHandle& method, CompLevel cur_level, bool delay_profiling, bool disable_feedback);
+  static CompLevel transition_from_none(const methodHandle& method, CompLevel cur_level, bool disable_feedback);
  template<typename Predicate>
-  static CompLevel transition_from_limited_profile(const methodHandle& method, CompLevel cur_level, bool delay_profiling, bool disable_feedback);
+  static CompLevel transition_from_limited_profile(const methodHandle& method, CompLevel cur_level, bool disable_feedback);
  template<typename Predicate>
  static CompLevel transition_from_full_profile(const methodHandle& method, CompLevel cur_level);
  template<typename Predicate>
-  static CompLevel standard_transition(const methodHandle& method, CompLevel cur_level, bool delayprof, bool disable_feedback);
+  static CompLevel standard_transition(const methodHandle& method, CompLevel cur_level, bool disable_feedback);

+  static bool should_delay_standard_transition(const methodHandle& method, CompLevel cur_level, MethodTrainingData* mtd);
  static CompLevel trained_transition_from_none(const methodHandle& method, CompLevel cur_level, MethodTrainingData* mtd, JavaThread* THREAD);
  static CompLevel trained_transition_from_limited_profile(const methodHandle& method, CompLevel cur_level, MethodTrainingData* mtd, JavaThread* THREAD);
  static CompLevel trained_transition_from_full_profile(const methodHandle& method, CompLevel cur_level, MethodTrainingData* mtd, JavaThread* THREAD);
@ -284,7 +285,7 @@ class CompilationPolicy : AllStatic {
  // level.
  static CompLevel loop_event(const methodHandle& method, CompLevel cur_level, JavaThread* THREAD);
  static void print_counters_on(outputStream* st, const char* prefix, Method* m);
-  static void print_training_data_on(outputStream* st, const char* prefix, Method* method);
+  static void print_training_data_on(outputStream* st, const char* prefix, Method* method, CompLevel cur_level);
  // Has a method been long around?
  // We don't remove old methods from the compile queue even if they have
  // very low activity (see select_task()).
--- a/src/hotspot/share/compiler/compilerDefinitions.cpp
+++ b/src/hotspot/share/compiler/compilerDefinitions.cpp
@ -215,11 +215,6 @@ void CompilerConfig::set_client_emulation_mode_flags() {
  if (FLAG_IS_DEFAULT(CodeCacheExpansionSize)) {
    FLAG_SET_ERGO(CodeCacheExpansionSize, 32*K);
  }
-  if (FLAG_IS_DEFAULT(MaxRAM)) {
-    // Do not use FLAG_SET_ERGO to update MaxRAM, as this will impact
-    // heap setting done based on available phys_mem (see Arguments::set_heap_size).
-    FLAG_SET_DEFAULT(MaxRAM, 1ULL*G);
-  }
  if (FLAG_IS_DEFAULT(CICompilerCount)) {
    FLAG_SET_ERGO(CICompilerCount, 1);
  }
@ -553,21 +548,36 @@ bool CompilerConfig::check_args_consistency(bool status) {
  return status;
 }

-void CompilerConfig::ergo_initialize() {
+bool CompilerConfig::should_set_client_emulation_mode_flags() {
 #if !COMPILER1_OR_COMPILER2
-  return;
+  return false;
 #endif

  if (has_c1()) {
    if (!is_compilation_mode_selected()) {
      if (NeverActAsServerClassMachine) {
-        set_client_emulation_mode_flags();
+        return true;
      }
    } else if (!has_c2() && !is_jvmci_compiler()) {
-      set_client_emulation_mode_flags();
+      return true;
    }
  }

+  return false;
+}
+
+void CompilerConfig::ergo_initialize() {
+#if !COMPILER1_OR_COMPILER2
+  return;
+#endif
+
+  // This property is also checked when selecting the heap size. Since client
+  // emulation mode influences Java heap memory usage, part of the logic must
+  // occur before choosing the heap size.
+  if (should_set_client_emulation_mode_flags()) {
+    set_client_emulation_mode_flags();
+  }
+
  set_legacy_emulation_flags();
  set_compilation_policy_flags();

--- a/src/hotspot/share/compiler/compilerDefinitions.hpp
+++ b/src/hotspot/share/compiler/compilerDefinitions.hpp
@ -151,6 +151,8 @@ public:

  inline static CompilerType compiler_type();

+  static bool should_set_client_emulation_mode_flags();
+
 private:
  static bool is_compilation_mode_selected();
  static void set_compilation_policy_flags();
--- a/src/hotspot/share/compiler/compiler_globals.hpp
+++ b/src/hotspot/share/compiler/compiler_globals.hpp
@ -271,13 +271,6 @@
          "Maximum rate sampling interval (in milliseconds)")               \
          range(1, max_intx)                                                \
                                                                            \
-  product(double, Tier0ProfileDelayFactor, 100.0, DIAGNOSTIC,               \
-          "Delay profiling/compiling of methods that were "                 \
-          "observed to be lukewarm")                                        \
-                                                                            \
-  product(double, Tier2ProfileDelayFactor, 250.0, DIAGNOSTIC,               \
-          "Delay profiling of methods that were observed to be lukewarm")   \
-                                                                            \
  product(bool, SkipTier2IfPossible, false, DIAGNOSTIC,                     \
          "Compile at tier 4 instead of tier 2 in training replay "         \
          "mode if posssible")                                              \
--- a/src/hotspot/share/compiler/compiler_globals_pd.hpp
+++ b/src/hotspot/share/compiler/compiler_globals_pd.hpp
@ -72,12 +72,10 @@ define_pd_global(size_t, CodeCacheMinBlockLength,    1);
 define_pd_global(size_t, CodeCacheMinimumUseSpace,   200*K);
 #ifndef ZERO
 define_pd_global(bool, NeverActAsServerClassMachine, true);
-define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
 #else
 // Zero runs without compilers. Do not let this code to force
 // the GC mode and default heap settings.
 define_pd_global(bool, NeverActAsServerClassMachine, false);
-define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
 #endif
 #define CI_COMPILER_COUNT 0
 #else
--- a/src/hotspot/share/gc/epsilon/epsilonHeap.cpp
+++ b/src/hotspot/share/gc/epsilon/epsilonHeap.cpp
@ -52,7 +52,7 @@ jint EpsilonHeap::initialize() {
  initialize_reserved_region(heap_rs);

  _space = new ContiguousSpace();
-  _space->initialize(committed_region, /* clear_space = */ true, /* mangle_space = */ true);
+  _space->initialize(committed_region, /* clear_space = */ true);

  // Precompute hot fields
  _max_tlab_size = MIN2(CollectedHeap::max_tlab_size(), align_object_size(EpsilonMaxTLABSize / HeapWordSize));
--- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp
@ -111,7 +111,7 @@ void G1BarrierSet::write_ref_array_pre(narrowOop* dst, size_t count, bool dest_u
  }
 }

-void G1BarrierSet::write_region(JavaThread* thread, MemRegion mr) {
+void G1BarrierSet::write_region(MemRegion mr) {
  if (mr.is_empty()) {
    return;
  }
--- a/src/hotspot/share/gc/g1/g1BarrierSet.hpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.hpp
@ -99,8 +99,7 @@ class G1BarrierSet: public CardTableBarrierSet {
  template <DecoratorSet decorators, typename T>
  void write_ref_field_pre(T* field);

-  inline void write_region(MemRegion mr);
-  void write_region(JavaThread* thread, MemRegion mr);
+  virtual void write_region(MemRegion mr);

  template <DecoratorSet decorators = DECORATORS_NONE, typename T>
  void write_ref_field_post(T* field);
--- a/src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
@ -68,10 +68,6 @@ inline void G1BarrierSet::write_ref_field_pre(T* field) {
  enqueue(field);
 }

-inline void G1BarrierSet::write_region(MemRegion mr) {
-  write_region(JavaThread::current(), mr);
-}
-
 template <DecoratorSet decorators, typename T>
 inline void G1BarrierSet::write_ref_field_post(T* field) {
  volatile CardValue* byte = _card_table->byte_for(field);
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
@ -403,21 +403,25 @@ HeapWord* G1CollectedHeap::allocate_new_tlab(size_t min_size,
  assert_heap_not_locked_and_not_at_safepoint();
  assert(!is_humongous(requested_size), "we do not allow humongous TLABs");

-  return attempt_allocation(min_size, requested_size, actual_size);
+  // Do not allow a GC because we are allocating a new TLAB to avoid an issue
+  // with UseGCOverheadLimit: although this GC would return null if the overhead
+  // limit would be exceeded, but it would likely free at least some space.
+  // So the subsequent outside-TLAB allocation could be successful anyway and
+  // the indication that the overhead limit had been exceeded swallowed.
+  return attempt_allocation(min_size, requested_size, actual_size, false /* allow_gc */);
 }

-HeapWord*
-G1CollectedHeap::mem_allocate(size_t word_size) {
+HeapWord* G1CollectedHeap::mem_allocate(size_t word_size) {
  assert_heap_not_locked_and_not_at_safepoint();

  if (is_humongous(word_size)) {
    return attempt_allocation_humongous(word_size);
  }
  size_t dummy = 0;
-  return attempt_allocation(word_size, word_size, &dummy);
+  return attempt_allocation(word_size, word_size, &dummy, true /* allow_gc */);
 }

-HeapWord* G1CollectedHeap::attempt_allocation_slow(uint node_index, size_t word_size) {
+HeapWord* G1CollectedHeap::attempt_allocation_slow(uint node_index, size_t word_size, bool allow_gc) {
  ResourceMark rm; // For retrieving the thread names in log messages.

  // Make sure you read the note in attempt_allocation_humongous().
@ -444,6 +448,8 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(uint node_index, size_t word_
      result = _allocator->attempt_allocation_locked(node_index, word_size);
      if (result != nullptr) {
        return result;
+      } else if (!allow_gc) {
+        return nullptr;
      }

      // Read the GC count while still holding the Heap_lock.
@ -461,8 +467,20 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(uint node_index, size_t word_
    log_trace(gc, alloc)("%s: Unsuccessfully scheduled collection allocating %zu words",
                         Thread::current()->name(), word_size);

+    if (is_shutting_down()) {
+      stall_for_vm_shutdown();
+      return nullptr;
+    }
+
+    // Has the gc overhead limit been reached in the meantime? If so, this mutator
+    // should receive null even when unsuccessfully scheduling a collection as well
+    // for global consistency.
+    if (gc_overhead_limit_exceeded()) {
+      return nullptr;
+    }
+
    // We can reach here if we were unsuccessful in scheduling a collection (because
-    // another thread beat us to it). In this case immeditealy retry the allocation
+    // another thread beat us to it). In this case immediately retry the allocation
    // attempt because another thread successfully performed a collection and possibly
    // reclaimed enough space. The first attempt (without holding the Heap_lock) is
    // here and the follow-on attempt will be at the start of the next loop
@ -479,11 +497,6 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(uint node_index, size_t word_
      log_warning(gc, alloc)("%s:  Retried allocation %u times for %zu words",
                             Thread::current()->name(), try_count, word_size);
    }
-
-    if (is_shutting_down()) {
-      stall_for_vm_shutdown();
-      return nullptr;
-    }
  }

  ShouldNotReachHere();
@ -612,7 +625,8 @@ void G1CollectedHeap::dealloc_archive_regions(MemRegion range) {

 inline HeapWord* G1CollectedHeap::attempt_allocation(size_t min_word_size,
                                                     size_t desired_word_size,
-                                                     size_t* actual_word_size) {
+                                                     size_t* actual_word_size,
+                                                     bool allow_gc) {
  assert_heap_not_locked_and_not_at_safepoint();
  assert(!is_humongous(desired_word_size), "attempt_allocation() should not "
         "be called for humongous allocation requests");
@ -624,7 +638,7 @@ inline HeapWord* G1CollectedHeap::attempt_allocation(size_t min_word_size,

  if (result == nullptr) {
    *actual_word_size = desired_word_size;
-    result = attempt_allocation_slow(node_index, desired_word_size);
+    result = attempt_allocation_slow(node_index, desired_word_size, allow_gc);
  }

  assert_heap_not_locked();
@ -707,6 +721,18 @@ HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size) {
    log_trace(gc, alloc)("%s: Unsuccessfully scheduled collection allocating %zu",
                         Thread::current()->name(), word_size);

+    if (is_shutting_down()) {
+      stall_for_vm_shutdown();
+      return nullptr;
+    }
+
+    // Has the gc overhead limit been reached in the meantime? If so, this mutator
+    // should receive null even when unsuccessfully scheduling a collection as well
+    // for global consistency.
+    if (gc_overhead_limit_exceeded()) {
+      return nullptr;
+    }
+
    // We can reach here if we were unsuccessful in scheduling a collection (because
    // another thread beat us to it).
    // Humongous object allocation always needs a lock, so we wait for the retry
@ -718,11 +744,6 @@ HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size) {
      log_warning(gc, alloc)("%s: Retried allocation %u times for %zu words",
                             Thread::current()->name(), try_count, word_size);
    }
-
-    if (is_shutting_down()) {
-      stall_for_vm_shutdown();
-      return nullptr;
-    }
  }

  ShouldNotReachHere();
@ -948,25 +969,62 @@ void G1CollectedHeap::resize_heap_after_young_collection(size_t allocation_word_
  phase_times()->record_resize_heap_time((Ticks::now() - start).seconds() * 1000.0);
 }

+void G1CollectedHeap::update_gc_overhead_counter() {
+  assert(SafepointSynchronize::is_at_safepoint(), "precondition");
+
+  if (!UseGCOverheadLimit) {
+    return;
+  }
+
+  bool gc_time_over_limit = (_policy->analytics()->long_term_gc_time_ratio() * 100) >= GCTimeLimit;
+  double free_space_percent = percent_of(num_available_regions() * G1HeapRegion::GrainBytes, max_capacity());
+  bool free_space_below_limit = free_space_percent < GCHeapFreeLimit;
+
+  log_debug(gc)("GC Overhead Limit: GC Time %f Free Space %f Counter %zu",
+                (_policy->analytics()->long_term_gc_time_ratio() * 100),
+                free_space_percent,
+                _gc_overhead_counter);
+
+  if (gc_time_over_limit && free_space_below_limit) {
+    _gc_overhead_counter++;
+  } else {
+    _gc_overhead_counter = 0;
+  }
+}
+
+bool G1CollectedHeap::gc_overhead_limit_exceeded() {
+  return _gc_overhead_counter >= GCOverheadLimitThreshold;
+}
+
 HeapWord* G1CollectedHeap::satisfy_failed_allocation_helper(size_t word_size,
                                                            bool do_gc,
                                                            bool maximal_compaction,
                                                            bool expect_null_mutator_alloc_region) {
-  // Let's attempt the allocation first.
-  HeapWord* result =
-    attempt_allocation_at_safepoint(word_size,
-                                    expect_null_mutator_alloc_region);
-  if (result != nullptr) {
-    return result;
-  }
+  // Skip allocation if GC overhead limit has been exceeded to let the mutator run
+  // into an OOME. It can either exit "gracefully" or try to free up memory asap.
+  // For the latter situation, keep running GCs. If the mutator frees up enough
+  // memory quickly enough, the overhead(s) will go below the threshold(s) again
+  // and the VM may continue running.
+  // If we did not continue garbage collections, the (gc overhead) limit may decrease
+  // enough by itself to not count as exceeding the limit any more, in the worst
+  // case bouncing back-and-forth all the time.
+  if (!gc_overhead_limit_exceeded()) {
+    // Let's attempt the allocation first.
+    HeapWord* result =
+      attempt_allocation_at_safepoint(word_size,
+                                      expect_null_mutator_alloc_region);
+    if (result != nullptr) {
+      return result;
+    }

-  // In a G1 heap, we're supposed to keep allocation from failing by
-  // incremental pauses.  Therefore, at least for now, we'll favor
-  // expansion over collection.  (This might change in the future if we can
-  // do something smarter than full collection to satisfy a failed alloc.)
-  result = expand_and_allocate(word_size);
-  if (result != nullptr) {
-    return result;
+    // In a G1 heap, we're supposed to keep allocation from failing by
+    // incremental pauses.  Therefore, at least for now, we'll favor
+    // expansion over collection.  (This might change in the future if we can
+    // do something smarter than full collection to satisfy a failed alloc.)
+    result = expand_and_allocate(word_size);
+    if (result != nullptr) {
+      return result;
+    }
  }

  if (do_gc) {
@ -990,6 +1048,10 @@ HeapWord* G1CollectedHeap::satisfy_failed_allocation_helper(size_t word_size,
 HeapWord* G1CollectedHeap::satisfy_failed_allocation(size_t word_size) {
  assert_at_safepoint_on_vm_thread();

+  // Update GC overhead limits after the initial garbage collection leading to this
+  // allocation attempt.
+  update_gc_overhead_counter();
+
  // Attempts to allocate followed by Full GC.
  HeapWord* result =
    satisfy_failed_allocation_helper(word_size,
@ -1021,6 +1083,10 @@ HeapWord* G1CollectedHeap::satisfy_failed_allocation(size_t word_size) {
    return result;
  }

+  if (gc_overhead_limit_exceeded()) {
+    log_info(gc)("GC Overhead Limit exceeded too often (%zu).", GCOverheadLimitThreshold);
+  }
+
  // What else?  We might try synchronous finalization later.  If the total
  // space available is large enough for the allocation, then a more
  // complete compaction phase than we've tried so far might be
@ -1202,6 +1268,7 @@ public:

 G1CollectedHeap::G1CollectedHeap() :
  CollectedHeap(),
+  _gc_overhead_counter(0),
  _service_thread(nullptr),
  _periodic_gc_task(nullptr),
  _free_arena_memory_task(nullptr),
@ -2481,15 +2548,15 @@ bool G1CollectedHeap::is_potential_eager_reclaim_candidate(G1HeapRegion* r) cons
 }

 #ifndef PRODUCT
-void G1CollectedHeap::verify_region_attr_remset_is_tracked() {
+void G1CollectedHeap::verify_region_attr_is_remset_tracked() {
  class VerifyRegionAttrRemSet : public G1HeapRegionClosure {
  public:
    virtual bool do_heap_region(G1HeapRegion* r) {
      G1CollectedHeap* g1h = G1CollectedHeap::heap();
-      bool const remset_is_tracked = g1h->region_attr(r->bottom()).remset_is_tracked();
-      assert(r->rem_set()->is_tracked() == remset_is_tracked,
+      const bool is_remset_tracked = g1h->region_attr(r->bottom()).is_remset_tracked();
+      assert(r->rem_set()->is_tracked() == is_remset_tracked,
             "Region %u remset tracking status (%s) different to region attribute (%s)",
-             r->hrm_index(), BOOL_TO_STR(r->rem_set()->is_tracked()), BOOL_TO_STR(remset_is_tracked));
+             r->hrm_index(), BOOL_TO_STR(r->rem_set()->is_tracked()), BOOL_TO_STR(is_remset_tracked));
      return false;
    }
  } cl;
@ -3092,9 +3159,9 @@ G1HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size, G1HeapRegio
      young_regions_cset_group()->add(new_alloc_region);
    } else {
      new_alloc_region->set_old();
+      update_region_attr(new_alloc_region);
    }
    _policy->remset_tracker()->update_at_allocate(new_alloc_region);
-    register_region_with_region_attr(new_alloc_region);
    G1HeapRegionPrinter::alloc(new_alloc_region);
    return new_alloc_region;
  }
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
@ -169,6 +169,17 @@ class G1CollectedHeap : public CollectedHeap {
  friend class G1CheckRegionAttrTableClosure;

 private:
+  // GC Overhead Limit functionality related members.
+  //
+  // The goal is to return null for allocations prematurely (before really going
+  // OOME) in case both GC CPU usage (>= GCTimeLimit) and not much available free
+  // memory (<= GCHeapFreeLimit) so that applications can exit gracefully or try
+  // to keep running by easing off memory.
+  uintx _gc_overhead_counter;        // The number of consecutive garbage collections we were over the limits.
+
+  void update_gc_overhead_counter();
+  bool gc_overhead_limit_exceeded();
+
  G1ServiceThread* _service_thread;
  G1ServiceTask* _periodic_gc_task;
  G1MonotonicArenaFreeMemoryTask* _free_arena_memory_task;
@ -439,18 +450,14 @@ private:
  //
  // * If either call cannot satisfy the allocation request using the
  //   current allocating region, they will try to get a new one. If
-  //   this fails, they will attempt to do an evacuation pause and
-  //   retry the allocation.
-  //
-  // * If all allocation attempts fail, even after trying to schedule
-  //   an evacuation pause, allocate_new_tlab() will return null,
-  //   whereas mem_allocate() will attempt a heap expansion and/or
-  //   schedule a Full GC.
+  //   this fails, (only) mem_allocate() will attempt to do an evacuation
+  //   pause and retry the allocation. Allocate_new_tlab() will return null,
+  //   deferring to the following mem_allocate().
  //
  // * We do not allow humongous-sized TLABs. So, allocate_new_tlab
  //   should never be called with word_size being humongous. All
  //   humongous allocation requests should go to mem_allocate() which
-  //   will satisfy them with a special path.
+  //   will satisfy them in a special path.

  HeapWord* allocate_new_tlab(size_t min_size,
                              size_t requested_size,
@ -463,12 +470,13 @@ private:
  // should only be used for non-humongous allocations.
  inline HeapWord* attempt_allocation(size_t min_word_size,
                                      size_t desired_word_size,
-                                      size_t* actual_word_size);
-
+                                      size_t* actual_word_size,
+                                      bool allow_gc);
  // Second-level mutator allocation attempt: take the Heap_lock and
  // retry the allocation attempt, potentially scheduling a GC
-  // pause. This should only be used for non-humongous allocations.
-  HeapWord* attempt_allocation_slow(uint node_index, size_t word_size);
+  // pause if allow_gc is set. This should only be used for non-humongous
+  // allocations.
+  HeapWord* attempt_allocation_slow(uint node_index, size_t word_size, bool allow_gc);

  // Takes the Heap_lock and attempts a humongous allocation. It can
  // potentially schedule a GC pause.
@ -637,16 +645,17 @@ public:
                              size_t word_size,
                              bool update_remsets);

-  // We register a region with the fast "in collection set" test. We
-  // simply set to true the array slot corresponding to this region.
-  void register_young_region_with_region_attr(G1HeapRegion* r) {
-    _region_attr.set_in_young(r->hrm_index(), r->has_pinned_objects());
-  }
+  // The following methods update the region attribute table, i.e. a compact
+  // representation of per-region information that is regularly accessed
+  // during GC.
+  inline void register_young_region_with_region_attr(G1HeapRegion* r);
  inline void register_new_survivor_region_with_region_attr(G1HeapRegion* r);
-  inline void register_region_with_region_attr(G1HeapRegion* r);
-  inline void register_old_region_with_region_attr(G1HeapRegion* r);
+  inline void register_old_collection_set_region_with_region_attr(G1HeapRegion* r);
  inline void register_optional_region_with_region_attr(G1HeapRegion* r);

+  // Updates region state without overwriting the type in the region attribute table.
+  inline void update_region_attr(G1HeapRegion* r);
+
  void clear_region_attr(const G1HeapRegion* hr) {
    _region_attr.clear(hr);
  }
@ -657,7 +666,7 @@ public:

  // Verify that the G1RegionAttr remset tracking corresponds to actual remset tracking
  // for all regions.
-  void verify_region_attr_remset_is_tracked() PRODUCT_RETURN;
+  void verify_region_attr_is_remset_tracked() PRODUCT_RETURN;

  void clear_bitmap_for_region(G1HeapRegion* hr);

--- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
@ -191,18 +191,26 @@ void G1CollectedHeap::register_humongous_candidate_region_with_region_attr(uint
  _region_attr.set_humongous_candidate(index);
 }

-void G1CollectedHeap::register_new_survivor_region_with_region_attr(G1HeapRegion* r) {
-  _region_attr.set_new_survivor_region(r->hrm_index());
+void G1CollectedHeap::register_young_region_with_region_attr(G1HeapRegion* r) {
+  assert(!is_in_cset(r), "should not already be registered as in collection set");
+  _region_attr.set_in_young(r->hrm_index(), r->has_pinned_objects());
 }

-void G1CollectedHeap::register_region_with_region_attr(G1HeapRegion* r) {
-  _region_attr.set_remset_is_tracked(r->hrm_index(), r->rem_set()->is_tracked());
+void G1CollectedHeap::register_new_survivor_region_with_region_attr(G1HeapRegion* r) {
+  assert(!is_in_cset(r), "should not already be registered as in collection set");
+  _region_attr.set_new_survivor_region(r->hrm_index(), r->has_pinned_objects());
+}
+
+void G1CollectedHeap::update_region_attr(G1HeapRegion* r) {
+  _region_attr.set_is_remset_tracked(r->hrm_index(), r->rem_set()->is_tracked());
  _region_attr.set_is_pinned(r->hrm_index(), r->has_pinned_objects());
 }

-void G1CollectedHeap::register_old_region_with_region_attr(G1HeapRegion* r) {
+void G1CollectedHeap::register_old_collection_set_region_with_region_attr(G1HeapRegion* r) {
+  assert(!is_in_cset(r), "should not already be registered as in collection set");
+  assert(r->is_old(), "must be");
  assert(r->rem_set()->is_complete(), "must be");
-  _region_attr.set_in_old(r->hrm_index(), true);
+  _region_attr.set_in_old(r->hrm_index(), true, r->has_pinned_objects());
  _rem_set->exclude_region_from_scan(r->hrm_index());
 }

--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp
@ -126,8 +126,7 @@ void G1CollectionSet::add_old_region(G1HeapRegion* hr) {

  assert(!hr->rem_set()->has_cset_group(), "Should have already uninstalled group remset");

-  assert(!hr->in_collection_set(), "should not already be in the collection set");
-  _g1h->register_old_region_with_region_attr(hr);
+  _g1h->register_old_collection_set_region_with_region_attr(hr);

  assert(_regions_cur_length < _regions_max_length, "Collection set now larger than maximum size.");
  _regions[_regions_cur_length++] = hr->hrm_index();
@ -724,7 +723,7 @@ bool G1CollectionSet::finalize_optional_for_evacuation(double remaining_pause_ti

  stop_incremental_building();

-  _g1h->verify_region_attr_remset_is_tracked();
+  _g1h->verify_region_attr_is_remset_tracked();

  return num_regions_selected > 0;
 }
@ -736,7 +735,7 @@ void G1CollectionSet::abandon_optional_collection_set(G1ParScanThreadStateSet* p
      // Clear collection set marker and make sure that the remembered set information
      // is correct as we still need it later.
      _g1h->clear_region_attr(r);
-      _g1h->register_region_with_region_attr(r);
+      _g1h->update_region_attr(r);
      r->clear_index_in_opt_cset();
    };

@ -745,7 +744,7 @@ void G1CollectionSet::abandon_optional_collection_set(G1ParScanThreadStateSet* p
    _optional_groups.remove_selected(_optional_groups.length(), _optional_groups.num_regions());
  }

-  _g1h->verify_region_attr_remset_is_tracked();
+  _g1h->verify_region_attr_is_remset_tracked();
 }

 #ifdef ASSERT
--- a/src/hotspot/share/gc/g1/g1HeapRegionAttr.hpp
+++ b/src/hotspot/share/gc/g1/g1HeapRegionAttr.hpp
@ -33,15 +33,15 @@
 struct G1HeapRegionAttr {
 public:
  typedef int8_t region_type_t;
-  // remset_is_tracked_t is essentially bool, but we need precise control
+  // is_remset_tracked_t is essentially bool, but we need precise control
  // on the size, and sizeof(bool) is implementation specific.
-  typedef uint8_t remset_is_tracked_t;
+  typedef uint8_t is_remset_tracked_t;
  // _is_pinned_t is essentially bool, but we want precise control
  // on the size, and sizeof(bool) is implementation specific.
  typedef uint8_t is_pinned_t;

 private:
-  remset_is_tracked_t _remset_is_tracked;
+  is_remset_tracked_t _is_remset_tracked;
  region_type_t _type;
  is_pinned_t _is_pinned;

@ -63,8 +63,8 @@ public:
  static const region_type_t Old          =   1;    // The region is in the collection set and an old region.
  static const region_type_t Num          =   2;

-  G1HeapRegionAttr(region_type_t type = NotInCSet, bool remset_is_tracked = false, bool is_pinned = false) :
-    _remset_is_tracked(remset_is_tracked ? 1 : 0), _type(type), _is_pinned(is_pinned ? 1 : 0) {
+  G1HeapRegionAttr(region_type_t type = NotInCSet, bool is_remset_tracked = false, bool is_pinned = false) :
+    _is_remset_tracked(is_remset_tracked ? 1 : 0), _type(type), _is_pinned(is_pinned ? 1 : 0) {
    assert(is_valid(), "Invalid type %d", _type);
  }

@ -82,9 +82,8 @@ public:
    }
  }

-  bool remset_is_tracked() const     { return _remset_is_tracked != 0; }
+  bool is_remset_tracked() const     { return _is_remset_tracked != 0; }

-  void set_new_survivor()              { _type = NewSurvivor; }
  bool is_pinned() const               { return _is_pinned != 0; }

  void set_old()                       { _type = Old; }
@ -93,7 +92,7 @@ public:
    _type = NotInCSet;
  }

-  void set_remset_is_tracked(bool value)      { _remset_is_tracked = value ? 1 : 0; }
+  void set_is_remset_tracked(bool value)      { _is_remset_tracked = value ? 1 : 0; }
  void set_is_pinned(bool value)       { _is_pinned = value ? 1 : 0; }

  bool is_in_cset_or_humongous_candidate() const { return is_in_cset() || is_humongous_candidate(); }
@ -126,26 +125,26 @@ class G1HeapRegionAttrBiasedMappedArray : public G1BiasedMappedArray<G1HeapRegio
 protected:
  G1HeapRegionAttr default_value() const { return G1HeapRegionAttr(G1HeapRegionAttr::NotInCSet); }
 public:
-  void set_optional(uintptr_t index, bool remset_is_tracked) {
+  void set_optional(uintptr_t index, bool is_remset_tracked) {
    assert(get_by_index(index).is_default(),
           "Region attributes at index " INTPTR_FORMAT " should be default but is %s", index, get_by_index(index).get_type_str());
-    set_by_index(index, G1HeapRegionAttr(G1HeapRegionAttr::Optional, remset_is_tracked));
+    set_by_index(index, G1HeapRegionAttr(G1HeapRegionAttr::Optional, is_remset_tracked));
  }

-  void set_new_survivor_region(uintptr_t index) {
+  void set_new_survivor_region(uintptr_t index, bool region_is_pinned) {
    assert(get_by_index(index).is_default(),
           "Region attributes at index " INTPTR_FORMAT " should be default but is %s", index, get_by_index(index).get_type_str());
-    get_ref_by_index(index)->set_new_survivor();
+    set_by_index(index, G1HeapRegionAttr(G1HeapRegionAttr::NewSurvivor, true, region_is_pinned));
  }

  void set_humongous_candidate(uintptr_t index) {
    assert(get_by_index(index).is_default(),
           "Region attributes at index " INTPTR_FORMAT " should be default but is %s", index, get_by_index(index).get_type_str());
    // Humongous candidates must have complete remset.
-    const bool remset_is_tracked = true;
+    const bool is_remset_tracked = true;
    // Humongous candidates can not be pinned.
    const bool region_is_pinned = false;
-    set_by_index(index, G1HeapRegionAttr(G1HeapRegionAttr::HumongousCandidate, remset_is_tracked, region_is_pinned));
+    set_by_index(index, G1HeapRegionAttr(G1HeapRegionAttr::HumongousCandidate, is_remset_tracked, region_is_pinned));
  }

  void clear_humongous_candidate(uintptr_t index) {
@ -156,8 +155,8 @@ class G1HeapRegionAttrBiasedMappedArray : public G1BiasedMappedArray<G1HeapRegio
    return get_ref_by_index(index)->is_humongous_candidate();
  }

-  void set_remset_is_tracked(uintptr_t index, bool remset_is_tracked) {
-    get_ref_by_index(index)->set_remset_is_tracked(remset_is_tracked);
+  void set_is_remset_tracked(uintptr_t index, bool is_remset_tracked) {
+    get_ref_by_index(index)->set_is_remset_tracked(is_remset_tracked);
  }

  void set_is_pinned(uintptr_t index, bool is_pinned) {
@ -170,12 +169,10 @@ class G1HeapRegionAttrBiasedMappedArray : public G1BiasedMappedArray<G1HeapRegio
    set_by_index(index, G1HeapRegionAttr(G1HeapRegionAttr::Young, true, is_pinned));
  }

-  void set_in_old(uintptr_t index, bool remset_is_tracked) {
+  void set_in_old(uintptr_t index, bool is_remset_tracked, bool is_pinned) {
    assert(get_by_index(index).is_default(),
           "Region attributes at index " INTPTR_FORMAT " should be default but is %s", index, get_by_index(index).get_type_str());
-    // We do not select regions with pinned objects into the collection set.
-    const bool region_is_pinned = false;
-    set_by_index(index, G1HeapRegionAttr(G1HeapRegionAttr::Old, remset_is_tracked, region_is_pinned));
+    set_by_index(index, G1HeapRegionAttr(G1HeapRegionAttr::Old, is_remset_tracked, is_pinned));
  }

  bool is_in_cset_or_humongous_candidate(HeapWord* addr) const { return at(addr).is_in_cset_or_humongous_candidate(); }
--- a/Show More
+++ b/Show More