diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 48c9a4ae306..0541e706e7d 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -1073,28 +1073,6 @@ public final class String return Arrays.copyOf(dst, dp); } - // This follows the implementation of encodeASCII - private static int encodedLengthASCII(byte coder, byte[] val) { - if (coder == LATIN1) { - return val.length; - } - int len = val.length >> 1; - int dp = 0; - for (int i = 0; i < len; i++) { - char c = StringUTF16.getChar(val, i); - if (c < 0x80) { - dp++; - continue; - } - if (Character.isHighSurrogate(c) && i + 1 < len && - Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) { - i++; - } - dp++; - } - return dp; - } - private static void replaceNegatives(byte[] val, int fromIndex) { for (int i = fromIndex; i < val.length; i++) { if (val[i] < 0) { @@ -1155,8 +1133,8 @@ public final class String return Arrays.copyOf(dst, dp); } - // This follows the implementation of encode8859_1 - private static int encodedLength8859_1(byte coder, byte[] val) { + // This follows the implementation of encodeASCII and encode8859_1 + private static int encodedLengthASCIIor8859_1(byte coder, byte[] val) { if (coder == LATIN1) { return val.length; } @@ -1164,13 +1142,19 @@ public final class String int dp = 0; int sp = 0; int sl = len; + while (sp < sl) { + char c = StringUTF16.getChar(val, sp); + if (c >= Character.MIN_HIGH_SURROGATE) { + break; + } + dp++; + sp++; + } while (sp < sl) { char c = StringUTF16.getChar(val, sp++); - if (c > 0x80) { - if (Character.isHighSurrogate(c) && sp < sl && - Character.isLowSurrogate(StringUTF16.getChar(val, sp))) { - sp++; - } + if (Character.isHighSurrogate(c) && sp < sl && + Character.isLowSurrogate(StringUTF16.getChar(val, sp))) { + sp++; } dp++; } @@ -2134,10 +2118,8 @@ public final class String Objects.requireNonNull(cs); if (cs == UTF_8.INSTANCE) { return encodedLengthUTF8(coder, value); - } else if (cs == ISO_8859_1.INSTANCE) { - return encodedLength8859_1(coder, value); - } else if (cs == US_ASCII.INSTANCE) { - return encodedLengthASCII(coder, value); + } else if (cs == ISO_8859_1.INSTANCE || cs == US_ASCII.INSTANCE) { + return encodedLengthASCIIor8859_1(coder, value); } return getBytes(cs).length; }