diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
index 68960118076..4adf317cfca 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -1968,6 +1968,22 @@ void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Registe
   orr(dst, dst, tmp);
 }
 
+// rotate left with shift bits, 32-bit version
+void MacroAssembler::rolw_imm(Register dst, Register src, uint32_t shift, Register tmp) {
+  if (UseZbb) {
+    // no roliw available
+    roriw(dst, src, 32 - shift);
+    return;
+  }
+
+  assert_different_registers(dst, tmp);
+  assert_different_registers(src, tmp);
+  assert(shift < 32, "shift amount must be < 32");
+  srliw(tmp, src, 32 - shift);
+  slliw(dst, src, shift);
+  orr(dst, dst, tmp);
+}
+
 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
   if (is_simm12(imm)) {
     and_imm12(Rd, Rn, imm);
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
index 2a34823c77a..2344aad3a1b 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -596,7 +596,9 @@ class MacroAssembler: public Assembler {
   void NAME(Register Rs1, Register Rs2, const address dest) {                                            \
     assert_cond(dest != nullptr);                                                                        \
     int64_t offset = dest - pc();                                                                        \
-    guarantee(is_simm13(offset) && ((offset % 2) == 0), "offset is invalid.");                           \
+    guarantee(is_simm13(offset) && is_even(offset),                                                      \
+              "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT,                                 \
+              BOOL_TO_STR(is_simm13(offset)), offset);                                                   \
     Assembler::NAME(Rs1, Rs2, offset);                                                                   \
   }                                                                                                      \
   INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype))     \
@@ -772,6 +774,7 @@ public:
   void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
 
   void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
+  void rolw_imm(Register dst, Register src, uint32_t, Register tmp = t0);
   void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
 
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
index 304e0a9c222..41bbb1bdcfd 100644
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -3913,6 +3913,373 @@ class StubGenerator: public StubCodeGenerator {
     return start;
   }
 
+  // Set of L registers that correspond to a contiguous memory area.
+  // Each 64-bit register typically corresponds to 2 32-bit integers.
+  template <uint L>
+  class RegCache {
+  private:
+    MacroAssembler *_masm;
+    Register _regs[L];
+
+  public:
+    RegCache(MacroAssembler *masm, RegSet rs): _masm(masm) {
+      assert(rs.size() == L, "%u registers are used to cache %u 4-byte data", rs.size(), 2 * L);
+      auto it = rs.begin();
+      for (auto &r: _regs) {
+        r = *it;
+        ++it;
+      }
+    }
+
+    void gen_loads(Register base) {
+      for (uint i = 0; i < L; i += 1) {
+        __ ld(_regs[i], Address(base, 8 * i));
+      }
+    }
+
+    // Generate code extracting i-th unsigned word (4 bytes).
+    void get_u32(Register dest, uint i, Register rmask32) {
+      assert(i < 2 * L, "invalid i: %u", i);
+
+      if (i % 2 == 0) {
+        __ andr(dest, _regs[i / 2], rmask32);
+      } else {
+        __ srli(dest, _regs[i / 2], 32);
+      }
+    }
+  };
+
+  typedef RegCache<8> BufRegCache;
+
+  // a += rtmp1 + x + ac;
+  // a = Integer.rotateLeft(a, s) + b;
+  void m5_FF_GG_HH_II_epilogue(BufRegCache& reg_cache,
+                               Register a, Register b, Register c, Register d,
+                               int k, int s, int t,
+                               Register rtmp1, Register rtmp2, Register rmask32) {
+    // rtmp1 = rtmp1 + x + ac
+    reg_cache.get_u32(rtmp2, k, rmask32);
+    __ addw(rtmp1, rtmp1, rtmp2);
+    __ li(rtmp2, t);
+    __ addw(rtmp1, rtmp1, rtmp2);
+
+    // a += rtmp1 + x + ac
+    __ addw(a, a, rtmp1);
+
+    // a = Integer.rotateLeft(a, s) + b;
+    __ rolw_imm(a, a, s, rtmp1);
+    __ addw(a, a, b);
+  }
+
+  // a += ((b & c) | ((~b) & d)) + x + ac;
+  // a = Integer.rotateLeft(a, s) + b;
+  void md5_FF(BufRegCache& reg_cache,
+              Register a, Register b, Register c, Register d,
+              int k, int s, int t,
+              Register rtmp1, Register rtmp2, Register rmask32) {
+    // rtmp1 = b & c
+    __ andr(rtmp1, b, c);
+
+    // rtmp2 = (~b) & d
+    __ notr(rtmp2, b);
+    __ andr(rtmp2, rtmp2, d);
+
+    // rtmp1 = (b & c) | ((~b) & d)
+    __ orr(rtmp1, rtmp1, rtmp2);
+
+    m5_FF_GG_HH_II_epilogue(reg_cache, a, b, c, d, k, s, t,
+                            rtmp1, rtmp2, rmask32);
+  }
+
+  // a += ((b & d) | (c & (~d))) + x + ac;
+  // a = Integer.rotateLeft(a, s) + b;
+  void md5_GG(BufRegCache& reg_cache,
+              Register a, Register b, Register c, Register d,
+              int k, int s, int t,
+              Register rtmp1, Register rtmp2, Register rmask32) {
+    // rtmp1 = b & d
+    __ andr(rtmp1, b, d);
+
+    // rtmp2 = (c & (~d))
+    __ notr(rtmp2, d);
+    __ andr(rtmp2, rtmp2, c);
+
+    // rtmp1 = (b & d) | (c & (~d))
+    __ orr(rtmp1, rtmp1, rtmp2);
+
+    m5_FF_GG_HH_II_epilogue(reg_cache, a, b, c, d, k, s, t,
+                            rtmp1, rtmp2, rmask32);
+  }
+
+  // a += ((b ^ c) ^ d) + x + ac;
+  // a = Integer.rotateLeft(a, s) + b;
+  void md5_HH(BufRegCache& reg_cache,
+              Register a, Register b, Register c, Register d,
+              int k, int s, int t,
+              Register rtmp1, Register rtmp2, Register rmask32) {
+    // rtmp1 = (b ^ c) ^ d
+    __ xorr(rtmp1, b, c);
+    __ xorr(rtmp1, rtmp1, d);
+
+    m5_FF_GG_HH_II_epilogue(reg_cache, a, b, c, d, k, s, t,
+                            rtmp1, rtmp2, rmask32);
+  }
+
+  // a += (c ^ (b | (~d))) + x + ac;
+  // a = Integer.rotateLeft(a, s) + b;
+  void md5_II(BufRegCache& reg_cache,
+              Register a, Register b, Register c, Register d,
+              int k, int s, int t,
+              Register rtmp1, Register rtmp2, Register rmask32) {
+    // rtmp1 = c ^ (b | (~d))
+    __ notr(rtmp2, d);
+    __ orr(rtmp1, b, rtmp2);
+    __ xorr(rtmp1, c, rtmp1);
+
+    m5_FF_GG_HH_II_epilogue(reg_cache, a, b, c, d, k, s, t,
+                            rtmp1, rtmp2, rmask32);
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - byte[]  source+offset
+  //   c_rarg1   - int[]   SHA.state
+  //   c_rarg2   - int     offset  (multi_block == True)
+  //   c_rarg3   - int     limit   (multi_block == True)
+  //
+  // Registers:
+  //    x0   zero  (zero)
+  //    x1     ra  (return address)
+  //    x2     sp  (stack pointer)
+  //    x3     gp  (global pointer)
+  //    x4     tp  (thread pointer)
+  //    x5     t0  state0
+  //    x6     t1  state1
+  //    x7     t2  state2
+  //    x8  f0/s0  (frame pointer)
+  //    x9     s1  state3  [saved-reg]
+  //   x10     a0  rtmp1 / c_rarg0
+  //   x11     a1  rtmp2 / c_rarg1
+  //   x12     a2  a     / c_rarg2
+  //   x13     a3  b     / c_rarg3
+  //   x14     a4  c
+  //   x15     a5  d
+  //   x16     a6  buf
+  //   x17     a7  state
+  //   x18     s2  ofs     [saved-reg]  (multi_block == True)
+  //   x19     s3  limit   [saved-reg]  (multi_block == True)
+  //   x20     s4
+  //   x21     s5
+  //   x22     s6  mask32  [saved-reg]
+  //   x23     s7
+  //   x24     s8  buf0    [saved-reg]
+  //   x25     s9  buf1    [saved-reg]
+  //   x26    s10  buf2    [saved-reg]
+  //   x27    s11  buf3    [saved-reg]
+  //   x28     t3  buf4
+  //   x29     t4  buf5
+  //   x30     t5  buf6
+  //   x31     t6  buf7
+  address generate_md5_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    // rotation constants
+    const int S11 = 7;
+    const int S12 = 12;
+    const int S13 = 17;
+    const int S14 = 22;
+    const int S21 = 5;
+    const int S22 = 9;
+    const int S23 = 14;
+    const int S24 = 20;
+    const int S31 = 4;
+    const int S32 = 11;
+    const int S33 = 16;
+    const int S34 = 23;
+    const int S41 = 6;
+    const int S42 = 10;
+    const int S43 = 15;
+    const int S44 = 21;
+
+    Register buf_arg   = c_rarg0; // a0
+    Register state_arg = c_rarg1; // a1
+    Register ofs_arg   = c_rarg2; // a2
+    Register limit_arg = c_rarg3; // a3
+
+    // we'll copy the args to these registers to free up a0-a3
+    // to use for other values manipulated by instructions
+    // that can be compressed
+    Register buf       = x16; // a6
+    Register state     = x17; // a7
+    Register ofs       = x18; // s2
+    Register limit     = x19; // s3
+
+    // using x12->15 to allow compressed instructions
+    Register a         = x12; // a2
+    Register b         = x13; // a3
+    Register c         = x14; // a4
+    Register d         = x15; // a5
+
+    Register state0    =  x5; // t0
+    Register state1    =  x6; // t1
+    Register state2    =  x7; // t2
+    Register state3    =  x9; // s1
+
+    // using x9->x11 to allow compressed instructions
+    Register rtmp1     = x10; // a0
+    Register rtmp2     = x11; // a1
+
+    const int64_t MASK_32 = 0xffffffff;
+    Register rmask32   = x22; // s6
+
+    RegSet reg_cache_saved_regs = RegSet::of(x24, x25, x26, x27); // s8, s9, s10, s11
+    RegSet reg_cache_regs;
+    reg_cache_regs += reg_cache_saved_regs;
+    reg_cache_regs += RegSet::of(x28, x29, x30, x31); // t3, t4, t5, t6
+    BufRegCache reg_cache(_masm, reg_cache_regs);
+
+    RegSet saved_regs;
+    if (multi_block) {
+      saved_regs += RegSet::of(ofs, limit);
+    }
+    saved_regs += RegSet::of(state3, rmask32);
+    saved_regs += reg_cache_saved_regs;
+
+    __ push_reg(saved_regs, sp);
+
+    __ mv(buf, buf_arg);
+    __ mv(state, state_arg);
+    if (multi_block) {
+      __ mv(ofs, ofs_arg);
+      __ mv(limit, limit_arg);
+    }
+    __ li(rmask32, MASK_32);
+
+    // to minimize the number of memory operations:
+    // read the 4 state 4-byte values in pairs, with a single ld,
+    // and split them into 2 registers
+    __ ld(state0, Address(state));
+    __ srli(state1, state0, 32);
+    __ andr(state0, state0, rmask32);
+    __ ld(state2, Address(state, 8));
+    __ srli(state3, state2, 32);
+    __ andr(state2, state2, rmask32);
+
+    Label md5_loop;
+    __ BIND(md5_loop);
+
+    reg_cache.gen_loads(buf);
+
+    __ mv(a, state0);
+    __ mv(b, state1);
+    __ mv(c, state2);
+    __ mv(d, state3);
+
+    // Round 1
+    md5_FF(reg_cache, a, b, c, d,  0, S11, 0xd76aa478, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, d, a, b, c,  1, S12, 0xe8c7b756, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, c, d, a, b,  2, S13, 0x242070db, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, b, c, d, a,  3, S14, 0xc1bdceee, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, a, b, c, d,  4, S11, 0xf57c0faf, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, d, a, b, c,  5, S12, 0x4787c62a, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, c, d, a, b,  6, S13, 0xa8304613, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, b, c, d, a,  7, S14, 0xfd469501, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, a, b, c, d,  8, S11, 0x698098d8, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, d, a, b, c,  9, S12, 0x8b44f7af, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, c, d, a, b, 10, S13, 0xffff5bb1, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, b, c, d, a, 11, S14, 0x895cd7be, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, a, b, c, d, 12, S11, 0x6b901122, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, d, a, b, c, 13, S12, 0xfd987193, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, c, d, a, b, 14, S13, 0xa679438e, rtmp1, rtmp2, rmask32);
+    md5_FF(reg_cache, b, c, d, a, 15, S14, 0x49b40821, rtmp1, rtmp2, rmask32);
+
+    // Round 2
+    md5_GG(reg_cache, a, b, c, d,  1, S21, 0xf61e2562, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, d, a, b, c,  6, S22, 0xc040b340, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, c, d, a, b, 11, S23, 0x265e5a51, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, b, c, d, a,  0, S24, 0xe9b6c7aa, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, a, b, c, d,  5, S21, 0xd62f105d, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, d, a, b, c, 10, S22, 0x02441453, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, c, d, a, b, 15, S23, 0xd8a1e681, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, b, c, d, a,  4, S24, 0xe7d3fbc8, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, a, b, c, d,  9, S21, 0x21e1cde6, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, d, a, b, c, 14, S22, 0xc33707d6, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, c, d, a, b,  3, S23, 0xf4d50d87, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, b, c, d, a,  8, S24, 0x455a14ed, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, a, b, c, d, 13, S21, 0xa9e3e905, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, d, a, b, c,  2, S22, 0xfcefa3f8, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, c, d, a, b,  7, S23, 0x676f02d9, rtmp1, rtmp2, rmask32);
+    md5_GG(reg_cache, b, c, d, a, 12, S24, 0x8d2a4c8a, rtmp1, rtmp2, rmask32);
+
+    // Round 3
+    md5_HH(reg_cache, a, b, c, d,  5, S31, 0xfffa3942, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, d, a, b, c,  8, S32, 0x8771f681, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, c, d, a, b, 11, S33, 0x6d9d6122, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, b, c, d, a, 14, S34, 0xfde5380c, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, a, b, c, d,  1, S31, 0xa4beea44, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, d, a, b, c,  4, S32, 0x4bdecfa9, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, c, d, a, b,  7, S33, 0xf6bb4b60, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, b, c, d, a, 10, S34, 0xbebfbc70, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, a, b, c, d, 13, S31, 0x289b7ec6, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, d, a, b, c,  0, S32, 0xeaa127fa, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, c, d, a, b,  3, S33, 0xd4ef3085, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, b, c, d, a,  6, S34, 0x04881d05, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, a, b, c, d,  9, S31, 0xd9d4d039, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, d, a, b, c, 12, S32, 0xe6db99e5, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, c, d, a, b, 15, S33, 0x1fa27cf8, rtmp1, rtmp2, rmask32);
+    md5_HH(reg_cache, b, c, d, a,  2, S34, 0xc4ac5665, rtmp1, rtmp2, rmask32);
+
+    // Round 4
+    md5_II(reg_cache, a, b, c, d,  0, S41, 0xf4292244, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, d, a, b, c,  7, S42, 0x432aff97, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, c, d, a, b, 14, S43, 0xab9423a7, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, b, c, d, a,  5, S44, 0xfc93a039, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, a, b, c, d, 12, S41, 0x655b59c3, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, d, a, b, c,  3, S42, 0x8f0ccc92, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, c, d, a, b, 10, S43, 0xffeff47d, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, b, c, d, a,  1, S44, 0x85845dd1, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, a, b, c, d,  8, S41, 0x6fa87e4f, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, d, a, b, c, 15, S42, 0xfe2ce6e0, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, c, d, a, b,  6, S43, 0xa3014314, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, b, c, d, a, 13, S44, 0x4e0811a1, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, a, b, c, d,  4, S41, 0xf7537e82, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, d, a, b, c, 11, S42, 0xbd3af235, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, c, d, a, b,  2, S43, 0x2ad7d2bb, rtmp1, rtmp2, rmask32);
+    md5_II(reg_cache, b, c, d, a,  9, S44, 0xeb86d391, rtmp1, rtmp2, rmask32);
+
+    __ addw(state0, state0, a);
+    __ addw(state1, state1, b);
+    __ addw(state2, state2, c);
+    __ addw(state3, state3, d);
+
+    if (multi_block) {
+      __ addi(buf, buf, 64);
+      __ addi(ofs, ofs, 64);
+      // if (ofs <= limit) goto m5_loop
+      __ bge(limit, ofs, md5_loop);
+      __ mv(c_rarg0, ofs); // return ofs
+    }
+
+    // to minimize the number of memory operations:
+    // write back the 4 state 4-byte values in pairs, with a single sd
+    __ andr(state0, state0, rmask32);
+    __ slli(state1, state1, 32);
+    __ orr(state0, state0, state1);
+    __ sd(state0, Address(state));
+    __ andr(state2, state2, rmask32);
+    __ slli(state3, state3, 32);
+    __ orr(state2, state2, state3);
+    __ sd(state2, Address(state, 8));
+
+    __ pop_reg(saved_regs, sp);
+    __ ret();
+
+    return (address) start;
+  }
+
 #if INCLUDE_JFR
 
   static void jfr_prologue(address the_pc, MacroAssembler* _masm, Register thread) {
@@ -4127,6 +4494,11 @@ class StubGenerator: public StubCodeGenerator {
     generate_compare_long_strings();
 
     generate_string_indexof_stubs();
+
+    if (UseMD5Intrinsics) {
+      StubRoutines::_md5_implCompress   = generate_md5_implCompress(false, "md5_implCompress");
+      StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true,  "md5_implCompressMB");
+    }
 #endif // COMPILER2_OR_JVMCI
   }
 
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
index d9b46131d66..83f6f38d253 100644
--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@@ -171,9 +171,8 @@ void VM_Version::initialize() {
     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
   }
 
-  if (UseMD5Intrinsics) {
-    warning("MD5 intrinsics are not available on this CPU.");
-    FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
+  if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
+    FLAG_SET_DEFAULT(UseMD5Intrinsics, true);
   }
 
   if (UseRVV) {