From 855b1298d8a659d6a5af25d3f219680ac70db8a7 Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Thu, 15 Jan 2026 18:05:38 +0100 Subject: [PATCH] Deduplicate with computeSizeUTF8_UTF16 --- .../share/classes/java/lang/String.java | 64 ++++++------------- 1 file changed, 19 insertions(+), 45 deletions(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index d114ea7886d..aeff8fd8ba1 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -1492,7 +1492,11 @@ public final class String // This follows the implementation of encodeUTF8 private static int encodedLengthUTF8(byte coder, byte[] val) { if (coder == UTF16) { - return encodedLengthUTF8_UTF16(val); + long length = encodedLengthUTF8_UTF16(val, null); + if (length > (long)Integer.MAX_VALUE) { + throw new IllegalStateException("Required length exceeds implementation limit"); + } + return (int) length; } int positives = StringCoding.countPositives(val, 0, val.length); if (positives == val.length) { @@ -1527,7 +1531,7 @@ public final class String int sl = val.length >> 1; // UTF-8 encoded can be as much as 3 times the string length // For very large estimate, (as in overflow of 32 bit int), precompute the exact size - long allocLen = (sl * 3 < 0) ? computeSizeUTF8_UTF16(val, exClass) : sl * 3; + long allocLen = (sl * 3 < 0) ? encodedLengthUTF8_UTF16(val, exClass) : sl * 3; if (allocLen > (long)Integer.MAX_VALUE) { throw new OutOfMemoryError("Required length exceeds implementation limit"); } @@ -1581,47 +1585,6 @@ public final class String return Arrays.copyOf(dst, dp); } - // This follows the implementation of encodeUTF8_UTF16 - private static int encodedLengthUTF8_UTF16(byte[] val) { - int dp = 0; - int sp = 0; - int sl = val.length >> 1; - while (sp < sl) { - // ascii fast loop; - char c = StringUTF16.getChar(val, sp); - if (c >= '\u0080') { - break; - } - dp++; - sp++; - } - while (sp < sl) { - char c = StringUTF16.getChar(val, sp++); - if (c < 0x80) { - dp++; - } else if (c < 0x800) { - dp += 2; - } else if (Character.isSurrogate(c)) { - int uc = -1; - char c2; - if (Character.isHighSurrogate(c) && sp < sl && - Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) { - uc = Character.toCodePoint(c, c2); - } - if (uc < 0) { - dp++; - } else { - dp += 4; - sp++; // 2 chars - } - } else { - // 3 bytes, 16 bits - dp += 3; - } - } - return dp; - } - /** * {@return the exact size required to UTF_8 encode this UTF16 string} * @@ -1631,11 +1594,20 @@ public final class String * @param The exception type parameter to enable callers to avoid * having to declare the exception */ - private static long computeSizeUTF8_UTF16(byte[] val, Class exClass) throws E { + private static long encodedLengthUTF8_UTF16(byte[] val, Class exClass) throws E { long dp = 0L; int sp = 0; int sl = val.length >> 1; + while (sp < sl) { + // ascii fast loop; + char c = StringUTF16.getChar(val, sp); + if (c >= '\u0080') { + break; + } + dp++; + sp++; + } while (sp < sl) { char c = StringUTF16.getChar(val, sp++); if (c < 0x80) { @@ -2136,10 +2108,12 @@ public final class String * * @implNote This method may allocate memory to compute the length for some charsets. * - * @param cs the {@link Charset} used to the compute the length + * @param cs The {@link Charset} used to the compute the length + * @throws NullPointerException If {@code cs} is {@code null} * @since 27 */ public int getBytesLength(Charset cs) { + Objects.requireNonNull(cs); if (cs == UTF_8.INSTANCE) { return encodedLengthUTF8(coder, value); } else if (cs == ISO_8859_1.INSTANCE) {