diff --git a/src/hotspot/cpu/s390/assembler_s390.hpp b/src/hotspot/cpu/s390/assembler_s390.hpp
index a0a86a707dd..0a138151ace 100644
--- a/src/hotspot/cpu/s390/assembler_s390.hpp
+++ b/src/hotspot/cpu/s390/assembler_s390.hpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2022 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2023 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -843,6 +843,10 @@ class Assembler : public AbstractAssembler {
 #define CY_ZOPC     (unsigned long)(227L << 40 | 89L)
 #define CGF_ZOPC    (unsigned long)(227L << 40 | 48L)
 #define CG_ZOPC     (unsigned long)(227L << 40 | 32L)
+// MI, signed
+#define CHHSI_ZOPC  (unsigned long)(0xe5L << 40 | 0x54L << 32)
+#define CHSI_ZOPC   (unsigned long)(0xe5L << 40 | 0x5cL << 32)
+#define CGHSI_ZOPC  (unsigned long)(0xe5L << 40 | 0x58L << 32)
 // RR, unsigned
 #define CLR_ZOPC    (unsigned  int)(21 << 8)
 #define CLGFR_ZOPC  (unsigned  int)(185 << 24 | 49 << 16)
@@ -855,6 +859,10 @@ class Assembler : public AbstractAssembler {
 #define CLY_ZOPC    (unsigned long)(227L << 40 | 85L)
 #define CLGF_ZOPC   (unsigned long)(227L << 40 | 49L)
 #define CLG_ZOPC    (unsigned long)(227L << 40 | 33L)
+// MI, unsigned
+#define CLHHSI_ZOPC (unsigned long)(0xe5L << 40 | 0x55L << 32)
+#define CLFHSI_ZOPC (unsigned long)(0xe5L << 40 | 0x5dL << 32)
+#define CLGHSI_ZOPC (unsigned long)(0xe5L << 40 | 0x59L << 32)
 // RI, unsigned
 #define TMHH_ZOPC   (unsigned  int)(167 << 24 | 2 << 16)
 #define TMHL_ZOPC   (unsigned  int)(167 << 24 | 3 << 16)
@@ -1060,6 +1068,7 @@ class Assembler : public AbstractAssembler {
 #define MVI_ZOPC    (unsigned  int)(0x92  << 24)
 #define MVIY_ZOPC   (unsigned long)(0xebL << 40 | 0x52L)
 #define MVC_ZOPC    (unsigned long)(0xd2L << 40)
+#define MVCIN_ZOPC  (unsigned long)(0xe8L << 40)
 #define MVCL_ZOPC   (unsigned  int)(0x0e  <<  8)
 #define MVCLE_ZOPC  (unsigned  int)(0xa8  << 24)
 
@@ -1708,21 +1717,21 @@ class Assembler : public AbstractAssembler {
 
   // unsigned immediate, in low bits, nbits long
   static long uimm(long x, int nbits) {
-    assert(Immediate::is_uimm(x, nbits), "unsigned constant out of range");
+    assert(Immediate::is_uimm(x, nbits), "unsigned immediate " INTPTR_FORMAT " out of range (%d bits)", x, nbits);
     return x & fmask(nbits - 1, 0);
   }
 
   // Cast '1' to long to avoid sign extension if nbits = 32.
   // signed immediate, in low bits, nbits long
   static long simm(long x, int nbits) {
-    assert(Immediate::is_simm(x, nbits), "value out of range");
+    assert(Immediate::is_simm(x, nbits), "signed immediate " INTPTR_FORMAT " out of range (%d bits)", x, nbits);
     return x & fmask(nbits - 1, 0);
   }
 
   static long imm(int64_t x, int nbits) {
     // Assert that x can be represented with nbits bits ignoring the sign bits,
     // i.e. the more higher bits should all be 0 or 1.
-    assert((x >> nbits) == 0 || (x >> nbits) == -1, "value out of range");
+    assert((x >> nbits) == 0 || (x >> nbits) == -1, "signed immediate " INTPTR_FORMAT " out of range (%d bits)", x, nbits);
     return x & fmask(nbits-1, 0);
   }
 
@@ -1734,7 +1743,7 @@ class Assembler : public AbstractAssembler {
   // contents of the DH field to the left of the contents of
   // the DL field.
   static long simm20(int64_t ui20) {
-    assert(Immediate::is_simm(ui20, 20), "value out of range");
+    assert(Immediate::is_simm(ui20, 20), "signed displacement (disp20) " INTPTR_FORMAT " out of range", ui20);
     return ( ((ui20        & 0xfffL) << (48-32)) |  // DL
             (((ui20 >> 12) &  0xffL) << (48-40)));  // DH
   }
@@ -1847,6 +1856,10 @@ class Assembler : public AbstractAssembler {
    //inline void z_cgf(Register r1,int64_t d2, Register x2, Register b2);// compare (r1, *(d2_uimm12+x2+b2)) ; int64 <--> int32
   inline void z_cg(  Register r1, const Address &a);                     // compare (r1, *(a))               ; int64
   inline void z_cg(  Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_imm20+x2+b2))  ; int64
+   // compare memory - immediate
+  inline void z_chhsi(int64_t d1, Register b1, int64_t i2);              // compare (*d1(b1), i2_imm16)      ; int16
+  inline void z_chsi( int64_t d1, Register b1, int64_t i2);              // compare (*d1(b1), i2_imm16)      ; int32
+  inline void z_cghsi(int64_t d1, Register b1, int64_t i2);              // compare (*d1(b1), i2_imm16)      ; int64
 
    // compare logical instructions
    // compare register
@@ -1862,6 +1875,10 @@ class Assembler : public AbstractAssembler {
   inline void z_cly(  Register r1, const Address& a);                    // compare (r1, *(a))               ; uint32
   inline void z_clg(  Register r1, const Address &a);                    // compare (r1, *(a)                ; uint64
   inline void z_clg(  Register r1, int64_t d2, Register x2, Register b2);// compare (r1, *(d2_imm20+x2+b2)   ; uint64
+   // compare memory - immediate
+  inline void z_clhhsi(int64_t d1, Register b1, int64_t i2);             // compare (*d1(b1), i2_imm16)      ; uint16
+  inline void z_clfhsi(int64_t d1, Register b1, int64_t i2);             // compare (*d1(b1), i2_imm16)      ; uint32
+  inline void z_clghsi(int64_t d1, Register b1, int64_t i2);             // compare (*d1(b1), i2_imm16)      ; uint64
 
   // test under mask
   inline void z_tmll(Register r1, int64_t i2);           // test under mask, see docu
@@ -2435,6 +2452,7 @@ class Assembler : public AbstractAssembler {
 
   inline void z_mvc(const Address& d, const Address& s, int64_t l);               // move l bytes
   inline void z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2); // move l+1 bytes
+  inline void z_mvcin(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2); // move l+1 bytes
   inline void z_mvcle(Register r1, Register r3, int64_t d2, Register b2=Z_R0);    // move region of memory
 
   inline void z_stfle(int64_t d2, Register b2);                            // store facility list extended
diff --git a/src/hotspot/cpu/s390/assembler_s390.inline.hpp b/src/hotspot/cpu/s390/assembler_s390.inline.hpp
index 2eb6cfb812c..126dd83ee22 100644
--- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp
+++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2022 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2023 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -277,7 +277,8 @@ inline void Assembler::z_mvc(const Address& d, const Address& s, int64_t l) {
   assert(!d.has_index() && !s.has_index(), "Address operand can not be encoded.");
   z_mvc(d.disp(), l-1, d.base(), s.disp(), s.base());
 }
-inline void Assembler::z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( MVC_ZOPC | uimm8(l, 8, 48) | rsmask_48(d1, b1) | rsmask_SS(d2, b2)); }
+inline void Assembler::z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( MVC_ZOPC   | uimm8(l, 8, 48) | rsmask_48(d1, b1) | rsmask_SS(d2, b2)); }
+inline void Assembler::z_mvcin(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( MVCIN_ZOPC | uimm8(l, 8, 48) | rsmask_48(d1, b1) | rsmask_SS(d2, b2)); }
 inline void Assembler::z_mvcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( MVCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | rsmaskt_32(d2, b2)); }
 
 inline void Assembler::z_mvhhi( int64_t d1, Register b1, int64_t i2) { emit_48( MVHHI_ZOPC | rsmask_48( d1, b1) | simm16(i2, 32, 48)); }
@@ -647,6 +648,9 @@ inline void Assembler::z_ch(  Register r1, const Address &a) { z_ch(r1, a.disp()
 inline void Assembler::z_c(   Register r1, const Address &a) { z_c( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
 inline void Assembler::z_cy(  Register r1, const Address &a) { z_cy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
 inline void Assembler::z_cg(  Register r1, const Address &a) { z_cg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_chhsi(int64_t d1, Register b1, int64_t i2) { emit_48( CHHSI_ZOPC  | rsmask_48(d1, b1) | simm16(i2, 32, 48)); }
+inline void Assembler::z_chsi( int64_t d1, Register b1, int64_t i2) { emit_48( CHSI_ZOPC   | rsmask_48(d1, b1) | simm16(i2, 32, 48)); }
+inline void Assembler::z_cghsi(int64_t d1, Register b1, int64_t i2) { emit_48( CGHSI_ZOPC  | rsmask_48(d1, b1) | simm16(i2, 32, 48)); }
 
 
 inline void Assembler::z_clfi( Register r1, int64_t i2) { emit_48( CLFI_ZOPC  | regt(r1, 8, 48) | uimm32(i2, 16, 48)); }
@@ -657,6 +661,9 @@ inline void Assembler::z_clg(  Register r1, int64_t d2, Register x2, Register b2
 inline void Assembler::z_cl(   Register r1, const Address &a) { z_cl( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
 inline void Assembler::z_cly(  Register r1, const Address &a) { z_cly(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
 inline void Assembler::z_clg(  Register r1, const Address &a) { z_clg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); }
+inline void Assembler::z_clhhsi(int64_t d1, Register b1, int64_t i2) { emit_48( CLHHSI_ZOPC  | rsmask_48(d1, b1) | simm16(i2, 32, 48)); }
+inline void Assembler::z_clfhsi(int64_t d1, Register b1, int64_t i2) { emit_48( CLFHSI_ZOPC  | rsmask_48(d1, b1) | simm16(i2, 32, 48)); }
+inline void Assembler::z_clghsi(int64_t d1, Register b1, int64_t i2) { emit_48( CLGHSI_ZOPC  | rsmask_48(d1, b1) | simm16(i2, 32, 48)); }
 
 inline void Assembler::z_clc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( CLC_ZOPC | uimm8(l, 8, 48) | rsmask_48(d1, b1) | rsmask_SS(d2, b2)); }
 inline void Assembler::z_clcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( CLCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | rsmaskt_32( d2, b2)); }
@@ -772,7 +779,6 @@ inline void Assembler::z_vleh(   VectorRegister v1, int64_t d2, Register x2, Reg
 inline void Assembler::z_vlef(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VLEF_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); }
 inline void Assembler::z_vleg(   VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t ix3){emit_48(VLEG_ZOPC  | vreg(v1,  8)                        | rxmask_48(d2, x2, b2) | uimm4(ix3, 32, 48)); }
 
-
 // Gather/Scatter
 inline void Assembler::z_vgef(   VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t ix3) {emit_48(VGEF_ZOPC  | vreg(v1,  8)                | rvmask_48(d2, vx2, b2) | uimm4(ix3, 32, 48)); }
 inline void Assembler::z_vgeg(   VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t ix3) {emit_48(VGEG_ZOPC  | vreg(v1,  8)                | rvmask_48(d2, vx2, b2) | uimm4(ix3, 32, 48)); }
@@ -1378,7 +1384,7 @@ inline void Assembler::z_brz(   Label& L) { z_brc(bcondZero, target(L)); }
 inline void Assembler::z_brnz(  Label& L) { z_brc(bcondNotZero, target(L)); }
 inline void Assembler::z_braz(  Label& L) { z_brc(bcondAllZero, target(L)); }
 inline void Assembler::z_brnaz( Label& L) { z_brc(bcondNotAllZero, target(L)); }
-inline void Assembler::z_brnp(  Label& L) { z_brc( bcondNotPositive, target( L)); }
+inline void Assembler::z_brnp(  Label& L) { z_brc(bcondNotPositive, target( L)); }
 inline void Assembler::z_btrue( Label& L) { z_brc(bcondAllOne, target(L)); }
 inline void Assembler::z_bfalse(Label& L) { z_brc(bcondAllZero, target(L)); }
 inline void Assembler::z_bvat(  Label& L) { z_brc(bcondVAlltrue, target(L)); }
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
index 60e67ed2ee8..54d79deb0c4 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2022 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2023 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1052,29 +1052,47 @@ int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_le
 }
 #endif
 
-// addr: Address descriptor of memory to clear index register will not be used !
+// addr: Address descriptor of memory to clear. Index register will not be used!
 // size: Number of bytes to clear.
+// condition code will not be preserved.
 //    !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!!
 //    !!! Use store_const() instead                  !!!
-void MacroAssembler::clear_mem(const Address& addr, unsigned size) {
-  guarantee(size <= 256, "MacroAssembler::clear_mem: size too large");
-
-  if (size == 1) {
-    z_mvi(addr, 0);
-    return;
-  }
+void MacroAssembler::clear_mem(const Address& addr, unsigned int size) {
+  guarantee((addr.disp() + size) <= 4096, "MacroAssembler::clear_mem: size too large");
 
   switch (size) {
-    case 2: z_mvhhi(addr, 0);
+    case 0:
       return;
-    case 4: z_mvhi(addr, 0);
+    case 1:
+      z_mvi(addr, 0);
       return;
-    case 8: z_mvghi(addr, 0);
+    case 2:
+      z_mvhhi(addr, 0);
+      return;
+    case 4:
+      z_mvhi(addr, 0);
+      return;
+    case 8:
+      z_mvghi(addr, 0);
       return;
     default: ; // Fallthru to xc.
   }
 
-  z_xc(addr, size, addr);
+  // Caution: the emitter with Address operands does implicitly decrement the length
+  if (size <= 256) {
+    z_xc(addr, size, addr);
+  } else {
+    unsigned int offset = addr.disp();
+    unsigned int incr   = 256;
+    for (unsigned int i = 0; i <= size-incr; i += incr) {
+      z_xc(offset, incr - 1, addr.base(), offset, addr.base());
+      offset += incr;
+    }
+    unsigned int rest = size - (offset - addr.disp());
+    if (size > 0) {
+      z_xc(offset, rest-1, addr.base(), offset, addr.base());
+    }
+  }
 }
 
 void MacroAssembler::align(int modulus) {
diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
index e0e9dbd929f..9ef4a3313b2 100644
--- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2022 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2023 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,35 +60,40 @@
 #define BLOCK_COMMENT(str) if (PrintAssembly || PrintStubCode) __ block_comment(str)
 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
 
-// These static, partially const, variables are for the AES intrinsics.
-// They are declared/initialized here to make them available across function bodies.
 
-#if defined(JIT_TIMER)
-    static const int JIT_TIMER_space      = 8;                   // extra space for JIT_TIMER data
-#else
-    static const int JIT_TIMER_space      = 0;
-#endif
-    static const int AES_parmBlk_align    = 32;                  // octoword alignment.
+  // These static, partially const, variables are for the AES intrinsics.
+  // They are declared/initialized here to make them available across function bodies.
 
-    static int AES_ctrVal_len  = 0;                              // ctr init value len (in bytes), expected: length of dataBlk (16)
-    static int AES_ctrVec_len  = 0;                              // # of ctr vector elements. That many block can be ciphered with one instruction execution
-    static int AES_ctrArea_len = 0;                              // reserved stack space (in bytes) for ctr (= ctrVal_len * ctrVec_len)
+      static const int AES_parmBlk_align    = 32;                  // octoword alignment.
+      static const int AES_stackSpace_incr  = AES_parmBlk_align;   // add'l stack space is allocated in such increments.
+                                                                   // Must be multiple of AES_parmBlk_align.
 
-    static int AES_parmBlk_addspace = 0;  // Must be multiple of AES_parmblk_align.
-                                          // Will be set by stub generator to stub specific value.
-    static int AES_dataBlk_space    = 0;  // Must be multiple of AES_parmblk_align.
-                                          // Will be set by stub generator to stub specific value.
+      static int AES_ctrVal_len  = 0;                              // ctr init value len (in bytes), expected: length of dataBlk (16)
+      static int AES_ctrVec_len  = 0;                              // # of ctr vector elements. That many block can be ciphered with one instruction execution
+      static int AES_ctrArea_len = 0;                              // reserved stack space (in bytes) for ctr (= ctrVal_len * ctrVec_len)
 
-    static const int keylen_offset     =  -1;
-    static const int fCode_offset      =  -2;
-    static const int ctrVal_len_offset =  -4;
-    static const int msglen_offset     =  -8;
-    static const int unextSP_offset    = -16;
-    static const int remmsg_len_offset = -20;
-    static const int argsave_offset    = -2*AES_parmBlk_align;
-    static const int localSpill_offset = argsave_offset + 24;  // arg2..arg4 are saved
+      static int AES_parmBlk_addspace = 0;  // Must be multiple of AES_parmblk_align.
+                                            // Will be set by stub generator to stub specific value.
+      static int AES_dataBlk_space    = 0;  // Must be multiple of AES_parmblk_align.
+                                            // Will be set by stub generator to stub specific value.
+      static int AES_dataBlk_offset   = 0;  // offset of the local src and dst dataBlk buffers
+                                            // Will be set by stub generator to stub specific value.
 
-// -----------------------------------------------------------------------
+      // These offsets are relative to the parameter block address (Register parmBlk = Z_R1)
+      static const int keylen_offset     =  -1;
+      static const int fCode_offset      =  -2;
+      static const int ctrVal_len_offset =  -4;
+      static const int msglen_offset     =  -8;
+      static const int unextSP_offset    = -16;
+      static const int rem_msgblk_offset = -20;
+      static const int argsave_offset    = -2*AES_parmBlk_align;
+      static const int regsave_offset    = -4*AES_parmBlk_align; // save space for work regs (Z_R10..13)
+      static const int msglen_red_offset = regsave_offset + AES_parmBlk_align; // reduced len after preLoop;
+      static const int counter_offset    = msglen_red_offset+8;  // current counter vector position.
+      static const int localSpill_offset = argsave_offset + 24;  // arg2..arg4 are saved
+
+
+      // -----------------------------------------------------------------------
 // Stub Code definitions
 
 class StubGenerator: public StubCodeGenerator {
@@ -1859,7 +1864,8 @@ class StubGenerator: public StubCodeGenerator {
     return __ addr_at(start_off);
   }
 
-// *****************************************************************************
+
+  // *****************************************************************************
 
   // AES CounterMode
   // Push a parameter block for the cipher/decipher instruction on the stack.
@@ -1867,8 +1873,6 @@ class StubGenerator: public StubCodeGenerator {
   //
   //   |        |
   //   +--------+ <-- SP before expansion
-  //   |        | JIT_TIMER timestamp buffer, only if JIT_TIMER is defined.
-  //   +--------+
   //   |        |
   //   :        :  alignment loss (part 2), 0..(AES_parmBlk_align-1) bytes.
   //   |        |
@@ -1877,7 +1881,8 @@ class StubGenerator: public StubCodeGenerator {
   //   :        :  byte[] ctr - kmctr expects a counter vector the size of the input vector.
   //   :        :         The interface only provides byte[16] iv, the init vector.
   //   :        :         The size of this area is a tradeoff between stack space, init effort, and speed.
-  //   |        |         Each counter is a 128bit int. Vector element i is formed by incrementing element (i-1).
+  //   |        |         Each counter is a 128bit int. Vector element [0] is a copy of iv.
+  //   |        |         Vector element [i] is formed by incrementing element [i-1].
   //   +--------+ <-- ctr = parmBlk + parmBlk_len
   //   |        |
   //   :        :  space for parameter block, size VM_Version::Cipher::_AES*_parmBlk_G
@@ -1920,7 +1925,17 @@ class StubGenerator: public StubCodeGenerator {
   //    parmBlk-40 free spill slot, used for local spills.
   //    parmBlk-64 ARG2(dst) ptr spill slot
   //    parmBlk-56 ARG3(crypto key) ptr spill slot
-  //    parmBlk-48 ARG4(counter value) ptr spill slot
+  //    parmBlk-48 ARG4(icv value) ptr spill slot
+  //
+  //    parmBlk-72
+  //    parmBlk-80
+  //    parmBlk-88 counter vector current position
+  //    parmBlk-96 reduced msg len (after preLoop processing)
+  //
+  //    parmBlk-104 Z_R13 spill slot (preLoop only)
+  //    parmBlk-112 Z_R12 spill slot (preLoop only)
+  //    parmBlk-120 Z_R11 spill slot (preLoop only)
+  //    parmBlk-128 Z_R10 spill slot (preLoop only)
   //
   //
   // Layout of the parameter block (instruction KMCTR, function KMCTR-AES*
@@ -1991,10 +2006,24 @@ class StubGenerator: public StubCodeGenerator {
     BLOCK_COMMENT("increment ctrVector counterMode_AESCrypt {");
 
     __ add2reg(counter, AES_parmBlk_align, parmBlk);       // ptr to counter array needs to be restored
-    for (int j = 0; j < AES_ctrVec_len; j++) {
-      int offset = j * AES_ctrVal_len;
+
+    if (v0_only) {
+      int offset = 0;
       generate_increment128(counter, offset, AES_ctrVec_len, scratch); // increment iv by # vector elements
-      if (v0_only) break;
+    } else {
+      int j = 0;
+      if (VM_Version::has_VectorFacility()) {
+        bool first_call = true;
+        for (; j < (AES_ctrVec_len - 3); j+=4) {                       // increment blocks of 4 iv elements
+          int offset = j * AES_ctrVal_len;
+          generate_increment128x4(counter, offset, AES_ctrVec_len, first_call);
+          first_call = false;
+        }
+      }
+      for (; j < AES_ctrVec_len; j++) {
+        int offset = j * AES_ctrVal_len;
+        generate_increment128(counter, offset, AES_ctrVec_len, scratch); // increment iv by # vector elements
+      }
     }
 
     BLOCK_COMMENT("} increment ctrVector counterMode_AESCrypt");
@@ -2014,29 +2043,62 @@ class StubGenerator: public StubCodeGenerator {
     __ z_stg(scratch, Address(counter, offset));           // store back
   }
 
-  void generate_counterMode_push_Block(int dataBlk_len, int parmBlk_len, int crypto_fCode,
+  void generate_increment128(Register counter, int offset, Register increment, Register scratch) {
+    __ clear_reg(scratch);                                 // prepare to add carry to high-order DW
+    __ z_alg(increment, Address(counter, offset + 8));     // increment low order DW
+    __ z_stg(increment, Address(counter, offset + 8));     // store back
+    __ z_alcg(scratch, Address(counter, offset));          // add carry to high-order DW
+    __ z_stg(scratch, Address(counter, offset));           // store back
+  }
+
+  // This is the vector variant of increment128, incrementing 4 ctr vector elements per call.
+  void generate_increment128x4(Register counter, int offset, int increment, bool init) {
+    VectorRegister Vincr      = Z_V16;
+    VectorRegister Vctr0      = Z_V20;
+    VectorRegister Vctr1      = Z_V21;
+    VectorRegister Vctr2      = Z_V22;
+    VectorRegister Vctr3      = Z_V23;
+
+    // Initialize the increment value only once for a series of increments.
+    // It must be assured that the non-initializing generator calls are
+    // immediately subsequent. Otherwise, there is no guarantee for Vincr to be unchanged.
+    if (init) {
+      __ z_vzero(Vincr);                                   // preset VReg with constant increment
+      __ z_vleih(Vincr, increment, 7);                     // rightmost HW has ix = 7
+    }
+
+    __ z_vlm(Vctr0, Vctr3, offset, counter);               // get the counter values
+    __ z_vaq(Vctr0, Vctr0, Vincr);                         // increment them
+    __ z_vaq(Vctr1, Vctr1, Vincr);
+    __ z_vaq(Vctr2, Vctr2, Vincr);
+    __ z_vaq(Vctr3, Vctr3, Vincr);
+    __ z_vstm(Vctr0, Vctr3, offset, counter);              // store the counter values
+  }
+
+  unsigned int generate_counterMode_push_Block(int dataBlk_len, int parmBlk_len, int crypto_fCode,
                            Register parmBlk, Register msglen, Register fCode, Register key) {
 
     // space for data blocks (src and dst, one each) for partial block processing)
-    AES_dataBlk_space    = roundup(2*dataBlk_len, AES_parmBlk_align);
-    AES_parmBlk_addspace = AES_parmBlk_align    // spill space (temp data)
-                         + AES_parmBlk_align    // for argument save/restore
+    AES_parmBlk_addspace = AES_stackSpace_incr             // spill space (temp data)
+                         + AES_stackSpace_incr             // for argument save/restore
+                         + AES_stackSpace_incr*2           // for work reg save/restore
                          ;
-    const int key_len    = parmBlk_len;         // The length of the unextended key (16, 24, 32)
+    AES_dataBlk_space    = roundup(2*dataBlk_len, AES_parmBlk_align);
+    AES_dataBlk_offset   = -(AES_parmBlk_addspace+AES_dataBlk_space);
+    const int key_len    = parmBlk_len;                    // The length of the unextended key (16, 24, 32)
 
     assert((AES_ctrVal_len == 0) || (AES_ctrVal_len == dataBlk_len), "varying dataBlk_len is not supported.");
-    AES_ctrVal_len  = dataBlk_len;               // ctr init value len (in bytes)
-    AES_ctrArea_len = AES_ctrVec_len * AES_ctrVal_len; // space required on stack for ctr vector
+    AES_ctrVal_len  = dataBlk_len;                         // ctr init value len (in bytes)
+    AES_ctrArea_len = AES_ctrVec_len * AES_ctrVal_len;     // space required on stack for ctr vector
 
     // This len must be known at JIT compile time. Only then are we able to recalc the SP before resize.
     // We buy this knowledge by wasting some (up to AES_parmBlk_align) bytes of stack space.
-    const int resize_len = JIT_TIMER_space       // timestamp storage for JIT_TIMER
-                         + AES_parmBlk_align     // room for alignment of parmBlk
-                         + AES_parmBlk_align     // extra room for alignment
-                         + AES_dataBlk_space     // one src and one dst data blk
-                         + AES_parmBlk_addspace  // spill space for local data
+    const int resize_len = AES_parmBlk_align               // room for alignment of parmBlk
+                         + AES_parmBlk_align               // extra room for alignment
+                         + AES_dataBlk_space               // one src and one dst data blk
+                         + AES_parmBlk_addspace            // spill space for local data
                          + roundup(parmBlk_len, AES_parmBlk_align)  // aligned length of parmBlk
-                         + AES_ctrArea_len       // stack space for ctr vector
+                         + AES_ctrArea_len                 // stack space for ctr vector
                          ;
     Register scratch     = fCode;  // We can use fCode as a scratch register. It's contents on entry
                                    // is irrelevant and it is set at the very end of this code block.
@@ -2050,47 +2112,94 @@ class StubGenerator: public StubCodeGenerator {
     // alignment waste in addspace and/or in the gap area.
     // After resize_frame, scratch contains the frame pointer.
     __ resize_frame(-resize_len, scratch, true);
+#ifdef ASSERT
+    __ clear_mem(Address(Z_SP, (intptr_t)8), resize_len - 8);
+#endif
 
     // calculate aligned parmBlk address from updated (resized) SP.
-    __ add2reg(parmBlk, AES_parmBlk_addspace + (AES_parmBlk_align-1), Z_SP);
+    __ add2reg(parmBlk, AES_parmBlk_addspace + AES_dataBlk_space + (2*AES_parmBlk_align-1), Z_SP);
     __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block.
 
     // There is room to spill stuff in the range [parmBlk-AES_parmBlk_addspace+8, parmBlk).
     __ z_mviy(keylen_offset, parmBlk, key_len - 1);        // Spill crypto key length for later use. Decrement by one for direct use with xc template.
     __ z_mviy(fCode_offset,  parmBlk, crypto_fCode);       // Crypto function code, will be loaded into Z_R0 later.
     __ z_sty(msglen, msglen_offset, parmBlk);              // full plaintext/ciphertext len.
+    __ z_sty(msglen, msglen_red_offset, parmBlk);          // save for main loop, may get updated in preLoop.
     __ z_sra(msglen, exact_log2(dataBlk_len));             // # full cipher blocks that can be formed from input text.
-    __ z_sty(msglen, remmsg_len_offset, parmBlk);
+    __ z_sty(msglen, rem_msgblk_offset, parmBlk);
 
     __ add2reg(scratch, resize_len, Z_SP);                 // calculate (SP before resize) from resized SP.
     __ z_stg(scratch, unextSP_offset, parmBlk);            // Spill unextended SP for easy revert.
+    __ z_stmg(Z_R10, Z_R13, regsave_offset, parmBlk);      // make some regs available as work registers
 
     // Fill parmBlk with all required data
     __ z_mvc(0, key_len-1, parmBlk, 0, key);               // Copy key. Need to do it here - key_len is only known here.
     BLOCK_COMMENT(err_msg("} push_Block (%d bytes) counterMode_AESCrypt%d", resize_len, parmBlk_len*8));
+    return resize_len;
   }
 
 
   void generate_counterMode_pop_Block(Register parmBlk, Register msglen, Label& eraser) {
     // For added safety, clear the stack area where the crypto key was stored.
     Register scratch = msglen;
-    assert_different_registers(scratch, Z_R0);            // can't use Z_R0 for exrl.
+    assert_different_registers(scratch, Z_R0);             // can't use Z_R0 for exrl.
 
     // wipe out key on stack
-    __ z_llgc(scratch, keylen_offset, parmBlk);           // get saved (key_len-1) value (we saved just one byte!)
-    __ z_exrl(scratch, eraser);                           // template relies on parmBlk still pointing to key on stack
+    __ z_llgc(scratch, keylen_offset, parmBlk);            // get saved (key_len-1) value (we saved just one byte!)
+    __ z_exrl(scratch, eraser);                            // template relies on parmBlk still pointing to key on stack
 
     // restore argument registers.
     //   ARG1(from) is Z_RET as well. Not restored - will hold return value anyway.
     //   ARG5(msglen) is restored further down.
     __ z_lmg(Z_ARG2, Z_ARG4, argsave_offset,    parmBlk);
 
-    __ z_lgf(msglen, msglen_offset,  parmBlk);            // Restore msglen, only low order FW is valid
-    __ z_lg(Z_SP,    unextSP_offset, parmBlk);            // trim stack back to unextended size
+    // restore work registers
+    __ z_lmg(Z_R10, Z_R13, regsave_offset, parmBlk);       // make some regs available as work registers
+
+    __ z_lgf(msglen, msglen_offset,  parmBlk);             // Restore msglen, only low order FW is valid
+#ifdef ASSERT
+    {
+      Label skip2last, skip2done;
+      // Z_RET (aka Z_R2) can be used as scratch as well. It will be set from msglen before return.
+      __ z_lgr(Z_RET, Z_SP);                                 // save extended SP
+      __ z_lg(Z_SP,    unextSP_offset, parmBlk);             // trim stack back to unextended size
+      __ z_sgrk(Z_R1, Z_SP, Z_RET);
+
+      __ z_cghi(Z_R1, 256);
+      __ z_brl(skip2last);
+      __ z_xc(0, 255, Z_RET, 0, Z_RET);
+      __ z_aghi(Z_RET, 256);
+      __ z_aghi(Z_R1, -256);
+
+      __ z_cghi(Z_R1, 256);
+      __ z_brl(skip2last);
+      __ z_xc(0, 255, Z_RET, 0, Z_RET);
+      __ z_aghi(Z_RET, 256);
+      __ z_aghi(Z_R1, -256);
+
+      __ z_cghi(Z_R1, 256);
+      __ z_brl(skip2last);
+      __ z_xc(0, 255, Z_RET, 0, Z_RET);
+      __ z_aghi(Z_RET, 256);
+      __ z_aghi(Z_R1, -256);
+
+      __ bind(skip2last);
+      __ z_lgr(Z_R0, Z_RET);
+      __ z_aghik(Z_RET, Z_R1, -1);  // decrement for exrl
+      __ z_brl(skip2done);
+      __ z_lgr(parmBlk, Z_R0);      // parmBlk == Z_R1, used in eraser template
+      __ z_exrl(Z_RET, eraser);
+
+      __ bind(skip2done);
+    }
+#else
+    __ z_lg(Z_SP,    unextSP_offset, parmBlk);             // trim stack back to unextended size
+#endif
   }
 
 
-  void generate_counterMode_push_parmBlk(Register parmBlk, Register msglen, Register fCode, Register key, bool is_decipher) {
+  int generate_counterMode_push_parmBlk(Register parmBlk, Register msglen, Register fCode, Register key, bool is_decipher) {
+    int       resize_len = 0;
     int       mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher;
     Label     parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set;
     Register  keylen = fCode;      // Expanded key length, as read from key array, Temp only.
@@ -2108,7 +2217,7 @@ class StubGenerator: public StubCodeGenerator {
 
     if (VM_Version::has_Crypto_AES_CTR128()) {
       __ bind(parmBlk_128);
-      generate_counterMode_push_Block(VM_Version::Cipher::_AES128_dataBlk,
+      resize_len = generate_counterMode_push_Block(VM_Version::Cipher::_AES128_dataBlk,
                           VM_Version::Cipher::_AES128_parmBlk_G,
                           VM_Version::Cipher::_AES128 + mode,
                           parmBlk, msglen, fCode, key);
@@ -2119,7 +2228,7 @@ class StubGenerator: public StubCodeGenerator {
 
     if (VM_Version::has_Crypto_AES_CTR192()) {
       __ bind(parmBlk_192);
-      generate_counterMode_push_Block(VM_Version::Cipher::_AES192_dataBlk,
+      resize_len = generate_counterMode_push_Block(VM_Version::Cipher::_AES192_dataBlk,
                           VM_Version::Cipher::_AES192_parmBlk_G,
                           VM_Version::Cipher::_AES192 + mode,
                           parmBlk, msglen, fCode, key);
@@ -2130,7 +2239,7 @@ class StubGenerator: public StubCodeGenerator {
 
     if (VM_Version::has_Crypto_AES_CTR256()) {
       __ bind(parmBlk_256);
-      generate_counterMode_push_Block(VM_Version::Cipher::_AES256_dataBlk,
+      resize_len = generate_counterMode_push_Block(VM_Version::Cipher::_AES256_dataBlk,
                           VM_Version::Cipher::_AES256_parmBlk_G,
                           VM_Version::Cipher::_AES256 + mode,
                           parmBlk, msglen, fCode, key);
@@ -2138,6 +2247,7 @@ class StubGenerator: public StubCodeGenerator {
     }
 
     __ bind(parmBlk_set);
+    return resize_len;
   }
 
 
@@ -2150,39 +2260,27 @@ class StubGenerator: public StubCodeGenerator {
     BLOCK_COMMENT("} pop parmBlk counterMode_AESCrypt");
   }
 
-  // Resize current stack frame to make room for some register data which needs
-  // to be spilled temporarily. All registers in the range [from..to] are spilled
-  // automatically. The actual length of the allocated aux block is returned.
-  // The extra spill space (if requested) is located at
-  //   [Z_SP+stackSpace-spillSpace, Z_SP+stackSpace)
-  // Kills Z__R0 (contains fp afterwards) and Z_R1 (contains old SP afterwards).
-  // All space in the range [SP..SP+regSpace) is reserved.
-  // As always (here): 0(SP) - stack linkage, 8(SP) - SP before resize for easy pop.
-  int generate_push_aux_block(Register from, Register to, unsigned int spillSpace) {
-    int n_regs     = to->encoding() - from->encoding() + 1;
-    int linkSpace  = 2*wordSize;
-    int regSpace   = n_regs*wordSize;
-    int stackSpace = roundup(linkSpace + regSpace + spillSpace, AES_parmBlk_align);
-    BLOCK_COMMENT(err_msg("push aux_block (%d bytes) counterMode_AESCrypt {", stackSpace));
-    __ z_lgr(Z_R1, Z_SP);
-    __ resize_frame(-stackSpace, Z_R0, true);
-    __ z_stg(Z_R1, 8, Z_SP);
-    __ z_stmg(from, to, linkSpace, Z_SP);
-    BLOCK_COMMENT(err_msg("} push aux_block (%d bytes) counterMode_AESCrypt", stackSpace));
-    return stackSpace;
-  }
-  // Reverts everything done by generate_push_aux_block().
-  void generate_pop_aux_block(Register from, Register to) {
-    BLOCK_COMMENT("pop aux_block counterMode_AESCrypt {");
-    __ z_lmg(from, to, 16, Z_SP);
-    __ z_lg(Z_SP, 8, Z_SP);
-    BLOCK_COMMENT("} pop aux_block counterMode_AESCrypt");
-  }
-
   // Implementation of counter-mode AES encrypt/decrypt function.
   //
   void generate_counterMode_AES_impl(bool is_decipher) {
 
+    // On entry:
+    // if there was a previous call to update(), and this previous call did not fully use
+    // the current encrypted counter, that counter is available at arg6_Offset(Z_SP).
+    // The index of the first unused bayte in the encrypted counter is available at arg7_Offset(Z_SP).
+    // The index is in the range [1..AES_ctrVal_len] ([1..16]), where index == 16 indicates a fully
+    // used previous encrypted counter.
+    // The unencrypted counter has already been incremented and is ready to be used for the next
+    // data block, after the unused bytes from the previous call have been consumed.
+    // The unencrypted counter follows the "increment-after use" principle.
+
+    // On exit:
+    // The index of the first unused byte of the encrypted counter is written back to arg7_Offset(Z_SP).
+    // A value of AES_ctrVal_len (16) indicates there is no leftover byte.
+    // If there is at least one leftover byte (1 <= index < AES_ctrVal_len), the encrypted counter value
+    // is written back to arg6_Offset(Z_SP). If there is no leftover, nothing is written back.
+    // The unencrypted counter value is written back after having been incremented.
+
     Register       from    = Z_ARG1; // byte[], source byte array (clear text)
     Register       to      = Z_ARG2; // byte[], destination byte array (ciphered)
     Register       key     = Z_ARG3; // byte[], expanded key array.
@@ -2191,78 +2289,101 @@ class StubGenerator: public StubCodeGenerator {
                                      // returned in Z_RET upon completion of this stub.
                                      // This is a jint. Negative values are illegal, but technically possible.
                                      // Do not rely on high word. Contents is undefined.
+               // encCtr   = Z_ARG6  - encrypted counter (byte array),
+               //                      address passed on stack at _z_abi(remaining_cargs) + 0 * WordSize
+               // cvIndex  = Z_ARG7  - # used (consumed) bytes of encrypted counter,
+               //                      passed on stack at _z_abi(remaining_cargs) + 1 * WordSize
+               //                      Caution:4-byte value, right-justified in 8-byte stack word
 
     const Register fCode   = Z_R0;   // crypto function code
     const Register parmBlk = Z_R1;   // parameter block address (points to crypto key)
-    const Register src     = Z_ARG1; // is Z_R2
+    const Register src     = Z_ARG1; // is Z_R2, forms even/odd pair with srclen
     const Register srclen  = Z_ARG2; // Overwrites destination address.
     const Register dst     = Z_ARG3; // Overwrites key address.
     const Register counter = Z_ARG5; // Overwrites msglen. Must have counter array in an even register.
 
     Label srcMover, dstMover, fromMover, ctrXOR, dataEraser;  // EXRL (execution) templates.
-    Label CryptoLoop, CryptoLoop_doit, CryptoLoop_end, CryptoLoop_setupAndDoLast, CryptoLoop_ctrVal_inc, allDone, Exit;
+    Label CryptoLoop, CryptoLoop_doit, CryptoLoop_end, CryptoLoop_setupAndDoLast, CryptoLoop_ctrVal_inc;
+    Label allDone, allDone_noInc, popAndExit, Exit;
+
+    int    arg6_Offset = _z_abi(remaining_cargs) + 0 * HeapWordSize;
+    int    arg7_Offset = _z_abi(remaining_cargs) + 1 * HeapWordSize; // stack slot holds ptr to int value
+    int   oldSP_Offset = 0;
+
+    // Is there anything to do at all? Protect against negative len as well.
+    __ z_ltr(msglen, msglen);
+    __ z_brnh(Exit);
+
+    // Expand stack, load parm block address into parmBlk (== Z_R1), copy crypto key to parm block.
+    oldSP_Offset = generate_counterMode_push_parmBlk(parmBlk, msglen, fCode, key, is_decipher);
+    arg6_Offset += oldSP_Offset;
+    arg7_Offset += oldSP_Offset;
 
     // Check if there is a leftover, partially used encrypted counter from last invocation.
     // If so, use those leftover counter bytes first before starting the "normal" encryption.
+
+    // We do not have access to the encrypted counter value. It is generated and used only
+    // internally within the previous kmctr instruction. But, at the end of call to this stub,
+    // the last encrypted couner is extracted by ciphering a 0x00 byte stream. The result is
+    // stored at the arg6 location for use with the subsequent call.
+    //
+    // The #used bytes of the encrypted counter (from a previous call) is provided via arg7.
+    // It is used as index into the encrypted counter to access the first byte availabla for ciphering.
+    // To cipher the input text, we move the number of remaining bytes in the encrypted counter from
+    // input to output. Then we simply XOR the output bytes with the associated encrypted counter bytes.
+
+    Register cvIxAddr  = Z_R10;                  // Address of index into encCtr. Preserved for use @CryptoLoop_end.
+    __ z_lg(cvIxAddr, arg7_Offset, Z_SP);        // arg7: addr of field encCTR_index.
+
     {
-      Register cvIndex   = Z_R10;  // # unused bytes of last encrypted counter value
-      Register cvUnused  = Z_R11;  // # unused bytes of last encrypted counter value
-      Register encCtr    = Z_R12;  // encrypted counter value, points to first ununsed byte.
-      Label no_preLoop, preLoop_end;
+      Register cvUnused  = Z_R11;                // # unused bytes of encrypted counter value (= 16 - cvIndex)
+      Register encCtr    = Z_R12;                // encrypted counter value, points to first ununsed byte.
+      Register cvIndex   = Z_R13;                // # index of first unused byte of encrypted counter value
+      Label    preLoop_end;
 
-      // Before pushing an aux block, check if it's necessary at all (saves some cycles).
-      __ z_lt(Z_R0, _z_abi(remaining_cargs) + 8 + 4, Z_R0, Z_SP); // arg7: # unused bytes in encCTR.
-      __ z_brnp(no_preLoop);                                      // no unused bytes, nothing special to do.
+      // preLoop is necessary only if there is a partially used encrypted counter (encCtr).
+      // Partially used means cvIndex is in [1, dataBlk_len-1].
+      // cvIndex == 0:           encCtr is set up but not used at all. Should not occur.
+      // cvIndex == dataBlk_len: encCtr is exhausted, all bytes used.
+      // Using unsigned compare protects against cases where (cvIndex < 0).
+      __ z_clfhsi(0, cvIxAddr, AES_ctrVal_len);  // check #used bytes in encCtr against ctr len.
+      __ z_brnl(preLoop_end);                    // if encCtr is fully used, skip to normal processing.
+      __ z_ltgf(cvIndex, 0, Z_R0, cvIxAddr);     // # used bytes in encCTR.
+      __ z_brz(preLoop_end);                     // if encCtr has no used bytes, skip to normal processing.
 
-      int   oldSP_Offset  = generate_push_aux_block(Z_R10, Z_R12, 16);
-      int   arg6_Offset   = oldSP_Offset + _z_abi(remaining_cargs);
-      int   arg7_Offset   = oldSP_Offset + _z_abi(remaining_cargs) + 8;
+      __ z_lg(encCtr, arg6_Offset, Z_SP);        // encrypted counter from last call to update()
+      __ z_agr(encCtr, cvIndex);                 // now points to first unused byte
 
-      __ z_ltgf(cvUnused, arg7_Offset+4, Z_R0, Z_SP); // arg7: # unused bytes in encCTR. (16-arg7) is index of first unused byte.
-      __ z_brnp(preLoop_end);                  // "not positive" means no unused bytes left
-      __ z_aghik(cvIndex, cvUnused, -16);      // calculate index of first unused byte. AES_ctrVal_len undefined at this point.
-      __ z_brnl(preLoop_end);                  // NotLow(=NotNegative): unused bytes >= 16? How that?
-      __ z_lcgr(cvIndex, cvIndex);
+      __ add2reg(cvUnused, -AES_ctrVal_len, cvIndex); // calculate #unused bytes in encCtr.
+      __ z_lcgr(cvUnused, cvUnused);             // previous checks ensure cvUnused in range [1, dataBlk_len-1]
 
-      __ z_lg(encCtr, arg6_Offset, Z_SP);      // arg6: encrypted counter byte array.
-      __ z_agr(encCtr, cvIndex);               // first unused byte of encrypted ctr. Used in ctrXOR.
-
-      __ z_cr(cvUnused, msglen);               // check if msg is long enough
+      __ z_lgf(msglen, msglen_offset, parmBlk);  // Restore msglen (jint value)
+      __ z_cr(cvUnused, msglen);                 // check if msg can consume all unused encCtr bytes
       __ z_locr(cvUnused, msglen, Assembler::bcondHigh); // take the shorter length
-
-      __ z_aghi(cvUnused, -1);                 // decrement # unused bytes by 1 for exrl instruction
-      __ z_brl(preLoop_end);                   // negative result means nothing to do (msglen is zero)
-
+      __ z_aghi(cvUnused, -1);                   // decrement # unused bytes by 1 for exrl instruction
+                                                 // preceding checks ensure cvUnused in range [1, dataBlk_len-1]
       __ z_exrl(cvUnused, fromMover);
       __ z_exrl(cvUnused, ctrXOR);
 
-      __ add2reg(cvUnused, 1, cvUnused);
+      __ z_aghi(cvUnused, 1);                    // revert decrement from above
+      __ z_agr(cvIndex, cvUnused);               // update index into encCtr (first unused byte)
+      __ z_st(cvIndex, 0, cvIxAddr);             // write back arg7, cvIxAddr is still valid
+
+      // update pointers and counters to prepare for main loop
       __ z_agr(from, cvUnused);
       __ z_agr(to, cvUnused);
-      __ z_sr(msglen, cvUnused);
-      __ z_brnz(preLoop_end);                  // there is still work to do
+      __ z_sr(msglen, cvUnused);                 // #bytes not yet processed
+      __ z_sty(msglen, msglen_red_offset, parmBlk); // save for calculations in main loop
+      __ z_srak(Z_R0, msglen, exact_log2(AES_ctrVal_len));// # full cipher blocks that can be formed from input text.
+      __ z_sty(Z_R0, rem_msgblk_offset, parmBlk);
 
-      // Remaining msglen is zero, i.e. all msg bytes were processed in preLoop.
-      // Take an early exit.
-      generate_pop_aux_block(Z_R10, Z_R12);
-      __ z_bru(Exit);
-
-      //-------------------------------------------
-      //---<  execution templates for preLoop  >---
-      //-------------------------------------------
-      __ bind(fromMover);
-      __ z_mvc(0, 0, to, 0, from);         // Template instruction to move input data to dst.
-      __ bind(ctrXOR);
-      __ z_xc(0,  0, to, 0, encCtr);       // Template instruction to XOR input data (now in to) with encrypted counter.
+      // check remaining msglen. If zero, all msg bytes were processed in preLoop.
+      __ z_ltr(msglen, msglen);
+      __ z_brnh(popAndExit);
 
       __ bind(preLoop_end);
-      generate_pop_aux_block(Z_R10, Z_R12);
-
-      __ bind(no_preLoop);
     }
 
-    // Expand stack, load parm block address into parmBlk (== Z_R1), copy crypto key to parm block.
-    generate_counterMode_push_parmBlk(parmBlk, msglen, fCode, key, is_decipher);
     // Create count vector on stack to accommodate up to AES_ctrVec_len blocks.
     generate_counterMode_prepare_Stack(parmBlk, ctr, counter, fCode);
 
@@ -2273,16 +2394,17 @@ class StubGenerator: public StubCodeGenerator {
 
     __ bind(CryptoLoop);
       __ z_lghi(srclen, AES_ctrArea_len);                     // preset len (#bytes) for next iteration: max possible.
-      __ z_asi(remmsg_len_offset, parmBlk, -AES_ctrVec_len);  // decrement #remaining blocks (16 bytes each). Range: [+127..-128]
-      __ z_brl(CryptoLoop_setupAndDoLast);                    // Handling the last iteration out-of-line
+      __ z_asi(rem_msgblk_offset, parmBlk, -AES_ctrVec_len);  // decrement #remaining blocks (16 bytes each). Range: [+127..-128]
+      __ z_brl(CryptoLoop_setupAndDoLast);                    // Handling the last iteration (using less than max #blocks) out-of-line
 
       __ bind(CryptoLoop_doit);
       __ kmctr(dst, counter, src);   // Cipher the message.
 
-      __ z_lt(srclen, remmsg_len_offset, Z_R0, parmBlk);      // check if this was the last iteration
+      __ z_lt(srclen, rem_msgblk_offset, Z_R0, parmBlk);      // check if this was the last iteration
       __ z_brz(CryptoLoop_ctrVal_inc);                        // == 0: ctrVector fully used. Need to increment the first
                                                               //       vector element to encrypt remaining unprocessed bytes.
 //    __ z_brl(CryptoLoop_end);                               //  < 0: this was detected before and handled at CryptoLoop_setupAndDoLast
+                                                              //  > 0: this is the fallthru case, need another iteration
 
       generate_counterMode_increment_ctrVector(parmBlk, counter, srclen, false); // srclen unused here (serves as scratch)
       __ z_bru(CryptoLoop);
@@ -2290,50 +2412,75 @@ class StubGenerator: public StubCodeGenerator {
     __ bind(CryptoLoop_end);
 
     // OK, when we arrive here, we have encrypted all of the "from" byte stream
-    // except for the last few [0..dataBlk_len) bytes. To encrypt these few bytes
-    // we need to form an extra src and dst data block of dataBlk_len each. This
-    // is because we can only process full blocks but we must not read or write
-    // beyond the boundaries of the argument arrays. Here is what we do:
-    //  - The src data block is filled with the remaining "from" bytes, padded with 0x00's.
+    // except for the last few [0..dataBlk_len) bytes. In addition, we know that
+    // there are no more unused bytes in the previously generated encrypted counter.
+    // The (unencrypted) counter, however, is ready to use (it was incremented before).
+
+    // To encrypt the few remaining bytes, we need to form an extra src and dst
+    // data block of dataBlk_len each. This is because we can only process full
+    // blocks but we must not read or write beyond the boundaries of the argument
+    // arrays. Here is what we do:
+    //  - The ctrVector has at least one unused element. This is ensured by CryptoLoop code.
+    //  - The (first) unused element is pointed at by the counter register.
+    //  - The src data block is filled with the remaining "from" bytes, remainder of block undefined.
     //  - The single src data block is encrypted into the dst data block.
     //  - The dst data block is copied into the "to" array, but only the leftmost few bytes
     //    (as many as were left in the source byte stream).
-    //  - The counter value to be used is is pointed at by the counter register.
-    //  - Fortunately, the crypto instruction (kmctr) updates all related addresses such that we
-    //    know where to continue with "from" and "to" and which counter value to use next.
+    //  - The counter value to be used is pointed at by the counter register.
+    //  - Fortunately, the crypto instruction (kmctr) has updated all related addresses such that
+    //    we know where to continue with "from" and "to" and which counter value to use next.
 
-    // Use speaking alias for temp register
-    Register dataBlk = counter;
-    __ z_stg(counter, -24, parmBlk);                      // spill address of counter array
-    __ add2reg(dataBlk, -(AES_parmBlk_addspace + AES_dataBlk_space), parmBlk);
+    Register encCtr    = Z_R12;  // encrypted counter value, points to stub argument.
+    Register tmpDst    = Z_R12;  // addr of temp destination (for last partial block encryption)
 
-    __ z_lgf(srclen, msglen_offset, parmBlk);             // full plaintext/ciphertext len.
-    __ z_nilf(srclen, AES_ctrVal_len - 1);                // those rightmost bits indicate the unprocessed #bytes
-    __ z_braz(allDone);                                   // no unprocessed bytes? Then we are done.
+    __ z_lgf(srclen, msglen_red_offset, parmBlk);          // plaintext/ciphertext len after potential preLoop processing.
+    __ z_nilf(srclen, AES_ctrVal_len - 1);                 // those rightmost bits indicate the unprocessed #bytes
+    __ z_stg(srclen, localSpill_offset, parmBlk);          // save for later reuse
+    __ z_mvhi(0, cvIxAddr, 16);                            // write back arg7 (default 16 in case of allDone).
+    __ z_braz(allDone_noInc);                              // no unprocessed bytes? Then we are done.
+                                                           // This also means the last block of data processed was
+                                                           // a full-sized block (AES_ctrVal_len bytes) which results
+                                                           // in no leftover encrypted counter bytes.
+    __ z_st(srclen, 0, cvIxAddr);                          // This will be the index of the first unused byte in the encrypted counter.
+    __ z_stg(counter, counter_offset, parmBlk);            // save counter location for easy later restore
 
-    __ add2reg(srclen, -1);                               // decrement for exrl
-    __ z_stg(srclen, localSpill_offset, parmBlk);         // save for later reuse
-    __ z_xc(0, AES_ctrVal_len - 1, dataBlk, 0, dataBlk);  // clear src block (zero padding)
-    __ z_exrl(srclen, srcMover);                          // copy src byte stream (remaining bytes)
-    __ load_const_optimized(srclen, AES_ctrVal_len);      // kmctr processes only complete blocks
+    // calculate address (on stack) for final dst and src blocks.
+    __ add2reg(tmpDst, AES_dataBlk_offset, parmBlk);       // tmp dst (on stack) is right before tmp src
 
-    __ z_lgr(src, dataBlk);                               // tmp src address for kmctr
-    __ z_lg(counter, -24, parmBlk);                       // restore counter
-    __ z_stg(dst, -24, parmBlk);                          // save current dst
-    __ add2reg(dst, AES_ctrVal_len, src);                 // tmp dst is right after tmp src
+    // We have a residue of [1..15] unprocessed bytes, srclen holds the exact number.
+    // Residue == 0 was checked just above, residue == AES_ctrVal_len would be another
+    // full-sized block and would have been handled by CryptoLoop.
 
-    __ kmctr(dst, counter, src);   // Cipher the remaining bytes.
+    __ add2reg(srclen, -1);                                // decrement for exrl
+    __ z_exrl(srclen, srcMover);                           // copy remaining bytes of src byte stream
+    __ load_const_optimized(srclen, AES_ctrVal_len);       // kmctr processes only complete blocks
+    __ add2reg(src, AES_ctrVal_len, tmpDst);               // tmp dst is right before tmp src
 
-    __ add2reg(dataBlk, -AES_ctrVal_len, dst);            // tmp dst address
-    __ z_lg(dst, -24, parmBlk);                           // real dst address
-    __ z_lg(srclen, localSpill_offset, parmBlk);          // reuse calc from above
+    __ kmctr(tmpDst, counter, src);                        // Cipher the remaining bytes.
+
+    __ add2reg(tmpDst, -AES_ctrVal_len, tmpDst);           // restore tmp dst address
+    __ z_lg(srclen, localSpill_offset, parmBlk);           // residual len, saved above
+    __ add2reg(srclen, -1);                                // decrement for exrl
     __ z_exrl(srclen, dstMover);
 
+    // Write back new encrypted counter
+    __ add2reg(src, AES_dataBlk_offset, parmBlk);
+    __ clear_mem(Address(src, RegisterOrConstant((intptr_t)0)), AES_ctrVal_len);
+    __ load_const_optimized(srclen, AES_ctrVal_len);       // kmctr processes only complete blocks
+    __ z_lg(encCtr, arg6_Offset, Z_SP);                    // write encrypted counter to arg6
+    __ z_lg(counter, counter_offset, parmBlk);             // restore counter
+    __ kmctr(encCtr, counter, src);
+
+    // The last used element of the counter vector contains the latest counter value that was used.
+    // As described above, the counter value on exit must be the one to be used next.
     __ bind(allDone);
-    __ z_llgf(srclen, msglen_offset, parmBlk);            // increment unencrypted ctr by #blocks processed.
-    __ z_srag(srclen, srclen, exact_log2(AES_ctrVal_len));
-    __ z_ag(srclen, 8, Z_R0, ctr);
-    __ z_stg(srclen, 8, Z_R0, ctr);
+    __ z_lg(counter, counter_offset, parmBlk);             // restore counter
+    generate_increment128(counter, 0, 1, Z_R0);
+
+    __ bind(allDone_noInc);
+    __ z_mvc(0, AES_ctrVal_len, ctr, 0, counter);
+
+    __ bind(popAndExit);
     generate_counterMode_pop_parmBlk(parmBlk, msglen, dataEraser);
 
     __ bind(Exit);
@@ -2345,30 +2492,38 @@ class StubGenerator: public StubCodeGenerator {
     //---<  out-of-line code  >---
     //----------------------------
     __ bind(CryptoLoop_setupAndDoLast);
-      __ z_lgf(srclen, remmsg_len_offset, parmBlk);           // remaining #blocks in memory is < 0
+      __ z_lgf(srclen, rem_msgblk_offset, parmBlk);           // remaining #blocks in memory is < 0
       __ z_aghi(srclen, AES_ctrVec_len);                      // recalculate the actually remaining #blocks
       __ z_sllg(srclen, srclen, exact_log2(AES_ctrVal_len));  // convert to #bytes. Counter value is same length as data block
       __ kmctr(dst, counter, src);                            // Cipher the last integral blocks of the message.
-      __ z_bru(CryptoLoop_end);
+      __ z_bru(CryptoLoop_end);                               // There is at least one unused counter vector element.
+                                                              // no need to increment.
 
     __ bind(CryptoLoop_ctrVal_inc);
       generate_counterMode_increment_ctrVector(parmBlk, counter, srclen, true); // srclen unused here (serves as scratch)
       __ z_bru(CryptoLoop_end);
 
+    //-------------------------------------------
+    //---<  execution templates for preLoop  >---
+    //-------------------------------------------
+    __ bind(fromMover);
+    __ z_mvc(0, 0, to, 0, from);               // Template instruction to move input data to dst.
+    __ bind(ctrXOR);
+    __ z_xc(0,  0, to, 0, encCtr);             // Template instruction to XOR input data (now in to) with encrypted counter.
+
     //-------------------------------
     //---<  execution templates  >---
     //-------------------------------
     __ bind(dataEraser);
     __ z_xc(0, 0, parmBlk, 0, parmBlk);  // Template instruction to erase crypto key on stack.
     __ bind(dstMover);
-    __ z_mvc(0, 0, dst, 0, dataBlk);     // Template instruction to move encrypted reminder from stack to dst.
+    __ z_mvc(0, 0, dst, 0, tmpDst);      // Template instruction to move encrypted reminder from stack to dst.
     __ bind(srcMover);
-    __ z_mvc(0, 0, dataBlk, 0, src);     // Template instruction to move reminder of source byte stream to stack.
+    __ z_mvc(AES_ctrVal_len, 0, tmpDst, 0, src); // Template instruction to move reminder of source byte stream to stack.
   }
 
 
   // Create two intrinsic variants, optimized for short and long plaintexts.
-  //
   void generate_counterMode_AES(bool is_decipher) {
 
     const Register msglen  = Z_ARG5;    // int, Total length of the msg to be encrypted. Value must be
@@ -2382,9 +2537,10 @@ class StubGenerator: public StubCodeGenerator {
     __ z_chi(msglen, threshold);
     __ z_brh(AESCTR_long);
 
+    __ bind(AESCTR_short);
+
     BLOCK_COMMENT(err_msg("counterMode_AESCrypt (text len <= %d, block size = %d) {", threshold, vec_short*16));
 
-    __ bind(AESCTR_short);
     AES_ctrVec_len = vec_short;
     generate_counterMode_AES_impl(false);   // control of generated code will not return
 
diff --git a/test/hotspot/jtreg/compiler/codegen/aes/Test8299817.java b/test/hotspot/jtreg/compiler/codegen/aes/Test8299817.java
new file mode 100644
index 00000000000..60851536eef
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/codegen/aes/Test8299817.java
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @key randomness
+ * @bug 8299817
+ * @summary AES-CTR cipher failure with multiple short (< 16 bytes) update calls.
+ * @library /test/lib /
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ *
+ * @run main/othervm -Xbatch
+ * -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
+ * compiler.codegen.aes.Test8299817
+ */
+
+package compiler.codegen.aes;
+
+import java.util.Arrays;
+import java.util.Random;
+import javax.crypto.Cipher;
+import javax.crypto.spec.IvParameterSpec;
+import javax.crypto.spec.SecretKeySpec;
+
+import compiler.whitebox.CompilerWhiteBoxTest;
+import jdk.test.whitebox.code.Compiler;
+import jdk.test.lib.Utils;
+import jtreg.SkippedException;
+
+public class Test8299817 {
+    private static final String ALGO = "AES/CTR/NoPadding";
+    private static final int LOOPS        = 20000;
+    private static final int WARMUP_LOOPS = 10000;
+    private static final int LEN_INC   = 5;
+    private static final int LEN_STEPS = 13;
+    private static final int LEN_MAX   = LEN_INC*LEN_STEPS;
+    private static final int SEG_INC   = 3;
+    private static final int SEG_MAX   = 11;
+    private static final int SHOW_ARRAY_LIMIT = 72;
+    private static final boolean DEBUG_MODE = false;
+
+    public static void main(String[] args) throws Exception {
+        if (!DEBUG_MODE) {
+            if (!Compiler.isIntrinsicAvailable(CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION,
+                                               "com.sun.crypto.provider.CounterMode", "implCrypt",
+                                                byte[].class, int.class, int.class, byte[].class, int.class)
+               ) {
+                throw new SkippedException("AES-CTR intrinsic is not available");
+            }
+        }
+
+        Random random = Utils.getRandomInstance();
+
+        // Create secret key
+        byte[] keyBytes = new byte[32];
+        random.nextBytes(keyBytes);
+        SecretKeySpec key = new SecretKeySpec(keyBytes, "AES");
+
+        // Create initial counter
+        byte[] ivBytes = new byte[16];
+        random.nextBytes(ivBytes);
+        if (DEBUG_MODE) {
+            for (int i = 0; i < 16; i++) {
+                ivBytes[i] = (byte)0;
+            }
+            ivBytes[15] = (byte)1;
+        }
+        IvParameterSpec iv = new IvParameterSpec(ivBytes);
+
+        // Create cipher objects and initialize
+        Cipher encryptCipher = Cipher.getInstance(ALGO);
+        Cipher decryptCipher = Cipher.getInstance(ALGO);
+
+        encryptCipher.init(Cipher.ENCRYPT_MODE, key, iv);
+        decryptCipher.init(Cipher.DECRYPT_MODE, key, iv);
+
+        // Create plaintext, ciphertext, and encrypted counter (reference copy)
+        byte[] original           = new byte[LEN_MAX];
+        byte[] original_encrypted = new byte[LEN_MAX];
+        byte[] counter_encrypted  = new byte[LEN_MAX];
+        // Retrieve the encrypted counter
+        if (DEBUG_MODE) {
+            for (int i = 0; i < LEN_MAX; i++) {
+                original[i] = (byte)0;
+            }
+            encryptCipher.doFinal(original, 0, LEN_MAX, counter_encrypted);
+        }
+        // Create the encrypted message reference (no JIT, no intrinsic involved)
+        if (DEBUG_MODE) {
+            for (int i = 0; i < LEN_MAX; i++) {
+                original[i] = (byte)i;
+            }
+            encryptCipher.doFinal(original, 0, LEN_MAX, original_encrypted);
+        }
+        if (DEBUG_MODE) {
+            showArray(original,           original.length,           "original:           ");
+            showArray(original_encrypted, original_encrypted.length, "original_encrypted: ");
+            showArray(counter_encrypted,  counter_encrypted.length,  "counter_encrypted:  ");
+        }
+
+        // Warmup to have everything compiled
+        System.out.println("Warming up, " + WARMUP_LOOPS + " iterations...");
+        byte[] work_encrypted = new byte[LEN_MAX];
+        byte[] work_decrypted = new byte[LEN_MAX];
+        byte[] varlen         = new byte[LEN_MAX*2];
+
+        for (int i = 0; i < WARMUP_LOOPS; i++) {
+            boolean failed = false;
+            if (!DEBUG_MODE) {
+                random.nextBytes(original);
+            }
+            encryptCipher.doFinal(original, 0, LEN_MAX, work_encrypted);
+
+            random.nextBytes(varlen);
+            for (int j = 0; j < LEN_MAX; j++) {
+                int len1 = (varlen[2*j] & 0x0f) + 1;
+                decryptCipher.update(work_encrypted,   0, len1,         work_decrypted,  0);
+                for (int k = 0; k < len1; k++) {
+                    if (original[k] != work_decrypted[k]) {
+                        if (!failed) {
+                            failed = true;
+                            System.out.println("-------------------");
+                        }
+                        System.out.println("Decrypt failure (warmup, update): LEN(" +
+                                           LEN_MAX + "), iteration (" + i + "), k = " + k);
+                    }
+                }
+                int len2 = (varlen[2*j+1] & 0x0f) + 1;
+                decryptCipher.update(work_encrypted, len1, len2,         work_decrypted, len1);
+                for (int k = len1; k < len1+len2; k++) {
+                    if (original[k] != work_decrypted[k]) {
+                        if (!failed) {
+                            failed = true;
+                            System.out.println("-------------------");
+                        }
+                        System.out.println("Decrypt failure (warmup, update): LEN(" +
+                                           LEN_MAX + "), iteration (" + i + "), k = " + k);
+                    }
+                }
+                decryptCipher.doFinal(work_encrypted, len1+len2, LEN_MAX-len1-len2, work_decrypted, len1+len2);
+                for (int k = len1+len2; k < LEN_MAX; k++) {
+                    if (original[k] != work_decrypted[k]) {
+                        if (!failed) {
+                            failed = true;
+                            System.out.println("-------------------");
+                        }
+                        System.out.println("Decrypt failure (warmup, doFinal): LEN(" +
+                                           LEN_MAX + "), iteration (" + i + "), k = " + k);
+                    }
+                }
+            }
+            if (!compareArrays(work_decrypted, original, false)) {
+                System.out.println("Warmup encrypt/decrypt failure during iteration " + i + " of LEN " + LEN_MAX);
+                compareArrays(work_decrypted, original, true);
+                showArray(work_encrypted,    work_encrypted.length,    "encrypted:");
+                showArray(counter_encrypted, counter_encrypted.length, "ctr_enc:  ");
+                if (!DEBUG_MODE) {
+                    System.exit(1);
+                }
+            }
+        }
+
+        System.out.println("Testing, " + LOOPS + " iterations...");
+        for (int LEN = 1; LEN < LEN_MAX; LEN += LEN_INC) {
+            work_encrypted = new byte[LEN];
+            work_decrypted = new byte[LEN];
+
+            for (int i = 0; i < LOOPS; i++) {
+                boolean failed = false;
+                random.nextBytes(original);
+                encryptCipher.doFinal(original, 0, LEN, work_encrypted);
+
+                int ix = 0;
+                for (int SEG = 0; (SEG < SEG_MAX) && (ix + SEG_INC < LEN); SEG++) {
+                    decryptCipher.update(work_encrypted, ix, SEG_INC, work_decrypted, ix);
+                    for (int k = ix; k < ix + SEG_INC; k++) {
+                        if (original[k] != work_decrypted[k]) {
+                            if (!failed) {
+                                failed = true;
+                                System.out.println("-------------------");
+                            }
+                            System.out.println("Decrypt failure (update): LEN(" + LEN + "), iteration " +
+                                               i + ", SEG(" + SEG + "), SEG_INC(" + SEG_INC + "), k = " + k);
+                        }
+                    }
+                    ix += SEG_INC;
+                }
+
+                decryptCipher.doFinal(work_encrypted, ix, LEN - ix, work_decrypted, ix);
+                if (!compareArrays(work_decrypted, original, false)) {
+                    if (!failed) {
+                        failed = true;
+                        System.out.println("-------------------");
+                    }
+                    System.out.println("While decrypting the remaining " + (LEN - ix) +
+                                       "(" + LEN + ") bytes of CT, iteration " + i);
+                    System.out.println("Decrypt failure (doFinal): LEN(" + LEN +
+                                       "), SEG_INC(" + SEG_INC + "), SEG_MAX(" + SEG_MAX + ")");
+                    showArray(work_encrypted, work_encrypted.length, "encrypted:");
+                    compareArrays(work_decrypted, original, true);
+                    if (!DEBUG_MODE) {
+                        System.exit(1);
+                    }
+                }
+            }
+        }
+    }
+
+    static void showArray(byte b[], int len, String name) {
+        System.out.format("%s [%d]: ", name, b.length);
+        for (int i = 0; i < Math.min(len, SHOW_ARRAY_LIMIT); i++) {
+            System.out.format("%02x ", b[i] & 0xff);
+        }
+        System.out.println();
+    }
+
+    static boolean compareArrays(byte b[], byte exp[], boolean print) {
+        boolean equal = true;
+        int len = (b.length <= exp.length) ? b.length : exp.length;
+        for (int i = 0; i < len; i++) {
+            equal &= b[i] == exp[i];
+            if (!equal) {
+                if (print) {
+                    System.out.format("encrypt/decrypt error at index %d: got %02x, expected %02x\n",
+                                      i, b[i] & 0xff, exp[i] & 0xff);
+                    showArray(b,   len, "result:   ");
+                    showArray(exp, len, "expected: ");
+                }
+                return equal;
+            }
+        }
+        return equal;
+    }
+}