diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 8acb8d8514b..15b8e98369e 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -604,14 +604,14 @@ public final class String } byte[] utf16 = StringUTF16.newBytesFor(length); StringLatin1.inflate(latin1, 0, utf16, 0, dp); - dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp); + dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp, true); if (dp != length) { utf16 = Arrays.copyOf(utf16, dp << 1); } return new String(utf16, UTF16); } else { // !COMPACT_STRINGS byte[] dst = StringUTF16.newBytesFor(length); - int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0); + int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true); if (dp != length) { dst = Arrays.copyOf(dst, dp << 1); } @@ -689,24 +689,12 @@ public final class String } /* - * {@return a new string by decoding from the given UTF-8 bytes array} - *

- * WARNING: The caller of this method is assumed to have relinquished - * and transferred the ownership of the byte array. It can thus be - * exclusively used to construct the {@code String}. - * - * @param bytes byte array containing UTF-8 encoded characters - * @param offset the index of the first byte to decode - * @param length the number of bytes to decode - * @throws NullPointerException If {@code bytes} is null - * @throws StringIndexOutOfBoundsException If {@code offset} is negative, - * {@code length} is negative, or {@code offset} is greater than - * {@code bytes.length - length} - * @throws CharacterCodingException for malformed input or unmappable characters + * Throws iae, instead of replacing, if malformed or unmappable. + * The byte array can be exclusively used to construct + * the string and is not modified or used for any other purpose. */ - private static String newStringUTF8OrThrow(byte[] bytes, int offset, int length) - throws CharacterCodingException { - checkBoundsOffCount(offset, length, bytes.length); // Implicit null check on `bytes` + private static String newStringUTF8NoRepl(byte[] bytes, int offset, int length) { + checkBoundsOffCount(offset, length, bytes.length); if (length == 0) { return ""; } @@ -757,10 +745,10 @@ public final class String StringLatin1.inflate(dst, 0, buf, 0, dp); dst = buf; } - dp = decodeUTF8_UTF16OrThrow(bytes, offset, sl, dst, dp); + dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false); } else { // !COMPACT_STRINGS dst = StringUTF16.newBytesFor(length); - dp = decodeUTF8_UTF16OrThrow(bytes, offset, offset + length, dst, 0); + dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, false); } if (dp != length) { dst = Arrays.copyOf(dst, dp << 1); @@ -796,13 +784,26 @@ public final class String * * @throws CharacterCodingException for malformed input or unmappable characters */ - static String newStringOrThrow(byte[] src, Charset cs) throws CharacterCodingException { + static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException { + try { + return newStringNoRepl1(src, cs); + } catch (IllegalArgumentException e) { + //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause + Throwable cause = e.getCause(); + if (cause instanceof MalformedInputException mie) { + throw mie; + } + throw (CharacterCodingException)cause; + } + } + + private static String newStringNoRepl1(byte[] src, Charset cs) { int len = src.length; if (len == 0) { return ""; } if (cs == UTF_8.INSTANCE) { - return newStringUTF8OrThrow(src, 0, src.length); + return newStringUTF8NoRepl(src, 0, src.length); } if (cs == ISO_8859_1.INSTANCE) { if (COMPACT_STRINGS) @@ -815,7 +816,7 @@ public final class String return new String(src, LATIN1); return new String(StringLatin1.inflate(src, 0, src.length), UTF16); } else { - throw malformedASCII(src); + throwMalformed(src); } } @@ -830,7 +831,13 @@ public final class String } int en = scale(len, cd.maxCharsPerByte()); char[] ca = new char[en]; - int caLen = decodeWithDecoder(cd, ca, src, 0, src.length); + int caLen; + try { + caLen = decodeWithDecoder(cd, ca, src, 0, src.length); + } catch (CharacterCodingException x) { + // throw via IAE + throw new IllegalArgumentException(x); + } if (COMPACT_STRINGS) { byte[] val = StringUTF16.compress(ca, 0, caLen); byte coder = StringUTF16.coderFromArrayLen(val, caLen); @@ -867,7 +874,7 @@ public final class String private static byte[] encode(Charset cs, byte coder, byte[] val) { if (cs == UTF_8.INSTANCE) { - return encodeUTF8(coder, val); + return encodeUTF8(coder, val, true); } if (cs == ISO_8859_1.INSTANCE) { return encode8859_1(coder, val); @@ -875,30 +882,13 @@ public final class String if (cs == US_ASCII.INSTANCE) { return encodeASCII(coder, val); } - return encodeWithEncoder(cs, coder, val, null); + return encodeWithEncoder(cs, coder, val, true); } - /** - * {@return the byte array obtained by first decoding {@code val} with - * {@code coder}, and then encoding the result with the encoder of {@code - * cs}} - * - * @param cs a charset to obtain the encoder from - * @param coder a coder to decode {@code val} with - * @param val a string byte array encoded with {@code coder} - * @param exClass The exception class where any non-null value indicates - * malformed or unmappable bytes will result in an exception - * to be thrown instead of getting replaced. - * @param The exception type parameter to enable callers to avoid - * having to declare the exception - */ - private static byte[] encodeWithEncoder( - Charset cs, byte coder, byte[] val, Class exClass) - throws E { + private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, boolean doReplace) { CharsetEncoder ce = cs.newEncoder(); int len = val.length >> coder; // assume LATIN1=0/UTF16=1; int en = scale(len, ce.maxBytesPerChar()); - boolean doReplace = exClass == null; // fastpath with ArrayEncoder implies `doReplace`. if (doReplace && ce instanceof ArrayEncoder ae) { // fastpath for ascii compatible @@ -940,9 +930,7 @@ public final class String cr.throwException(); } catch (CharacterCodingException x) { if (!doReplace) { - @SuppressWarnings("unchecked") - E cce = (E) x; - throw cce; + throw new IllegalArgumentException(x); } else { throw new Error(x); } @@ -950,69 +938,60 @@ public final class String return trimArray(ba, bb.position()); } - /** - * {@return the sequence of bytes obtained by encoding the given string in UTF-8} - * - * @param s the string to encode - * @throws NullPointerException If {@code s} is null - * @throws CharacterCodingException For malformed input or unmappable characters + /* + * Throws iae, instead of replacing, if unmappable. */ - static byte[] getBytesUTF8OrThrow(String s) throws CharacterCodingException { - return encodeUTF8OrThrow(s.coder(), s.value()); // Implicit null check on `s` + static byte[] getBytesUTF8NoRepl(String s) { + return encodeUTF8(s.coder(), s.value(), false); } private static boolean isASCII(byte[] src) { return !StringCoding.hasNegatives(src, 0, src.length); } - /** - * {@return the sequence of bytes obtained by encoding the given string in - * the specified {@code Charset}} - *

- * WARNING: This method returns the {@code byte[]} backing the provided - * {@code String}, if the input is ASCII. Hence, the returned byte array - * must not be modified. - * - * @param s the string to encode - * @param cs the charset - * @throws NullPointerException If {@code s} or {@code cs} is null - * @throws CharacterCodingException For malformed input or unmappable characters + /* + * Throws CCE, instead of replacing, if unmappable. */ - static byte[] getBytesOrThrow(String s, Charset cs) throws CharacterCodingException { - Objects.requireNonNull(cs); - byte[] val = s.value(); // Implicit null check on `s` + static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException { + try { + return getBytesNoRepl1(s, cs); + } catch (IllegalArgumentException e) { + //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause + Throwable cause = e.getCause(); + if (cause instanceof UnmappableCharacterException) { + throw (UnmappableCharacterException)cause; + } + throw (CharacterCodingException)cause; + } + } + + private static byte[] getBytesNoRepl1(String s, Charset cs) { + byte[] val = s.value(); byte coder = s.coder(); if (cs == UTF_8.INSTANCE) { if (coder == LATIN1 && isASCII(val)) { return val; } - return encodeUTF8OrThrow(coder, val); + return encodeUTF8(coder, val, false); } if (cs == ISO_8859_1.INSTANCE) { if (coder == LATIN1) { return val; } - return encode8859_1OrThrow(coder, val); + return encode8859_1(coder, val, false); } if (cs == US_ASCII.INSTANCE) { if (coder == LATIN1) { if (isASCII(val)) { return val; } else { - throw unmappableASCII(val); + throwUnmappable(val); } } } - return encodeWithEncoder(cs, coder, val, CharacterCodingException.class); + return encodeWithEncoder(cs, coder, val, false); } - /** - * {@return the byte array obtained by first decoding {@code val} with - * {@code coder}, and then encoding the result with US-ASCII} - * - * @param coder a coder to decode {@code val} with - * @param val a string byte array encoded with {@code coder} - */ private static byte[] encodeASCII(byte coder, byte[] val) { if (coder == LATIN1) { int positives = StringCoding.countPositives(val, 0, val.length); @@ -1052,26 +1031,10 @@ public final class String } private static byte[] encode8859_1(byte coder, byte[] val) { - return encode8859_1(coder, val, null); + return encode8859_1(coder, val, true); } - private static byte[] encode8859_1OrThrow(byte coder, byte[] val) throws UnmappableCharacterException { - return encode8859_1(coder, val, UnmappableCharacterException.class); - } - - /** - * {@return the byte array obtained by first decoding {@code val} with - * {@code coder}, and then encoding the result with ISO-8859-1} - * - * @param coder a coder to decode {@code val} with - * @param val a string byte array encoded with {@code coder} - * @param exClass The exception class where any non-null value indicates - * malformed or unmappable bytes will result in an exception - * to be thrown instead of getting replaced. - * @param The exception type parameter to enable callers to avoid - * having to declare the exception - */ - private static byte[] encode8859_1(byte coder, byte[] val, Class exClass) throws E { + private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) { if (coder == LATIN1) { return val.clone(); } @@ -1085,8 +1048,8 @@ public final class String sp = sp + ret; dp = dp + ret; if (ret != len) { - if (exClass != null) { - throw String.unmappableCharacterException(sp); + if (!doReplace) { + throwUnmappable(sp); } char c = StringUTF16.getChar(val, sp++); if (Character.isHighSurrogate(c) && sp < sl && @@ -1180,26 +1143,7 @@ public final class String ((byte) 0x80 << 0)))); } - private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp) { - return decodeUTF8_UTF16(src, sp, sl, dst, dp, null); - } - - private static int decodeUTF8_UTF16OrThrow( - byte[] src, int sp, int sl, byte[] dst, int dp) - throws MalformedInputException { - return decodeUTF8_UTF16(src, sp, sl, dst, dp, MalformedInputException.class); - } - - /** - * @param exClass The exception class where any non-null value indicates - * malformed or unmappable bytes will result in an exception - * to be thrown instead of getting replaced. - * @param The exception type parameter to enable callers to avoid - * having to declare the exception - */ - private static int decodeUTF8_UTF16( - byte[] src, int sp, int sl, byte[] dst, int dp, Class exClass) - throws E { + private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) { while (sp < sl) { int b1 = src[sp++]; if (b1 >= 0) { @@ -1208,8 +1152,8 @@ public final class String if (sp < sl) { int b2 = src[sp++]; if (isNotContinuation(b2)) { - if (exClass != null) { - throw String.malformedInputException(sp - 1, 1); + if (!doReplace) { + throwMalformed(sp - 1, 1); } StringUTF16.putChar(dst, dp++, REPL); sp--; @@ -1218,8 +1162,8 @@ public final class String } continue; } - if (exClass != null) { - throw String.malformedInputException(sp, 1); // underflow() + if (!doReplace) { + throwMalformed(sp, 1); // underflow() } StringUTF16.putChar(dst, dp++, REPL); break; @@ -1228,8 +1172,8 @@ public final class String int b2 = src[sp++]; int b3 = src[sp++]; if (isMalformed3(b1, b2, b3)) { - if (exClass != null) { - throw String.malformedInputException(sp - 3, 3); + if (!doReplace) { + throwMalformed(sp - 3, 3); } StringUTF16.putChar(dst, dp++, REPL); sp -= 3; @@ -1237,8 +1181,8 @@ public final class String } else { char c = decode3(b1, b2, b3); if (Character.isSurrogate(c)) { - if (exClass != null) { - throw String.malformedInputException(sp - 3, 3); + if (!doReplace) { + throwMalformed(sp - 3, 3); } StringUTF16.putChar(dst, dp++, REPL); } else { @@ -1248,14 +1192,14 @@ public final class String continue; } if (sp < sl && isMalformed3_2(b1, src[sp])) { - if (exClass != null) { - throw String.malformedInputException(sp - 1, 2); + if (!doReplace) { + throwMalformed(sp - 1, 2); } StringUTF16.putChar(dst, dp++, REPL); continue; } - if (exClass != null) { - throw String.malformedInputException(sp, 1); + if (!doReplace) { + throwMalformed(sp, 1); } StringUTF16.putChar(dst, dp++, REPL); break; @@ -1267,8 +1211,8 @@ public final class String int uc = decode4(b1, b2, b3, b4); if (isMalformed4(b2, b3, b4) || !Character.isSupplementaryCodePoint(uc)) { // shortest form check - if (exClass != null) { - throw String.malformedInputException(sp - 4, 4); + if (!doReplace) { + throwMalformed(sp - 4, 4); } StringUTF16.putChar(dst, dp++, REPL); sp -= 4; @@ -1281,14 +1225,14 @@ public final class String } b1 &= 0xff; if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) { - if (exClass != null) { - throw String.malformedInputException(sp - 1, 1); // or 2 + if (!doReplace) { + throwMalformed(sp - 1, 1); // or 2 } StringUTF16.putChar(dst, dp++, REPL); continue; } - if (exClass != null) { - throw String.malformedInputException(sp - 1, 1); + if (!doReplace) { + throwMalformed(sp - 1, 1); } sp++; StringUTF16.putChar(dst, dp++, REPL); @@ -1297,8 +1241,8 @@ public final class String } break; } else { - if (exClass != null) { - throw String.malformedInputException(sp - 1, 1); + if (!doReplace) { + throwMalformed(sp - 1, 1); } StringUTF16.putChar(dst, dp++, REPL); } @@ -1340,76 +1284,29 @@ public final class String return 3; } - /** - * {@return a new {@link MalformedInputException} for the sub-range denoted - * by specified {@code offset} and {@code length}} - * - * @param The exception type parameter to enable callers to avoid - * having to declare the exception - */ - @SuppressWarnings("unchecked") - private static E malformedInputException(int offset, int length) throws E { - MalformedInputException mie = new MalformedInputException(length); - String msg = "malformed input offset : " + offset + ", length : " + length; - mie.initCause(new IllegalArgumentException(msg)); - return (E) mie; + private static void throwMalformed(int off, int nb) { + String msg = "malformed input off : " + off + ", length : " + nb; + throw new IllegalArgumentException(msg, new MalformedInputException(nb)); } - /** - * {@return a new {@link MalformedInputException} for the given malformed - * ASCII string} - */ - private static MalformedInputException malformedASCII(byte[] val) throws MalformedInputException { + private static void throwMalformed(byte[] val) { int dp = StringCoding.countPositives(val, 0, val.length); - return malformedInputException(dp, 1); + throwMalformed(dp, 1); } - /** - * {@return a new {@link UnmappableCharacterException} at given {@code offset}} - * - * @param The exception type parameter to enable callers to avoid - * having to declare the exception - */ - @SuppressWarnings("unchecked") - private static E unmappableCharacterException(int offset) throws E { - UnmappableCharacterException uce = new UnmappableCharacterException(1); - String msg = "malformed input offset : " + offset + ", length : 1"; - uce.initCause(new IllegalArgumentException(msg, uce)); - return (E) uce; + private static void throwUnmappable(int off) { + String msg = "malformed input off : " + off + ", length : 1"; + throw new IllegalArgumentException(msg, new UnmappableCharacterException(1)); } - /** - * {@return a new {@link UnmappableCharacterException} for the given - * malformed ASCII string} - */ - private static UnmappableCharacterException unmappableASCII(byte[] val) throws UnmappableCharacterException { + private static void throwUnmappable(byte[] val) { int dp = StringCoding.countPositives(val, 0, val.length); - return unmappableCharacterException(dp); + throwUnmappable(dp); } - private static byte[] encodeUTF8(byte coder, byte[] val) { - return encodeUTF8(coder, val, null); - } - - private static byte[] encodeUTF8OrThrow(byte coder, byte[] val) throws UnmappableCharacterException { - return encodeUTF8(coder, val, UnmappableCharacterException.class); - } - - /** - * {@return the byte array obtained by first decoding {@code val} with - * {@code coder}, and then encoding the result with UTF-8} - * - * @param coder a coder to decode {@code val} with - * @param val a string byte array encoded with {@code coder} - * @param exClass The exception class where any non-null value indicates - * malformed or unmappable bytes will result in an exception - * to be thrown instead of getting replaced. - * @param The exception type parameter to enable callers to avoid - * having to declare the exception - */ - private static byte[] encodeUTF8(byte coder, byte[] val, Class exClass) throws E { + private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) { if (coder == UTF16) { - return encodeUTF8_UTF16(val, exClass); + return encodeUTF8_UTF16(val, doReplace); } int positives = StringCoding.countPositives(val, 0, val.length); @@ -1437,24 +1334,13 @@ public final class String return Arrays.copyOf(dst, dp); } - /** - * {@return the byte array obtained by first decoding {@code val} with - * UTF-16, and then encoding the result with UTF-8} - * - * @param val a string byte array encoded with UTF-16 - * @param exClass The exception class where any non-null value indicates - * malformed or unmappable bytes will result in an exception - * to be thrown instead of getting replaced. - * @param The exception type parameter to enable callers to avoid - * having to declare the exception - */ - private static byte[] encodeUTF8_UTF16(byte[] val, Class exClass) throws E { + private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) { int dp = 0; int sp = 0; int sl = val.length >> 1; // UTF-8 encoded can be as much as 3 times the string length // For very large estimate, (as in overflow of 32 bit int), precompute the exact size - long allocLen = (sl * 3 < 0) ? computeSizeUTF8_UTF16(val, exClass) : sl * 3; + long allocLen = (sl * 3 < 0) ? computeSizeUTF8_UTF16(val, doReplace) : sl * 3; if (allocLen > (long)Integer.MAX_VALUE) { throw new OutOfMemoryError("Required length exceeds implementation limit"); } @@ -1483,10 +1369,10 @@ public final class String uc = Character.toCodePoint(c, c2); } if (uc < 0) { - if (exClass == null) { + if (doReplace) { dst[dp++] = '?'; } else { - throw String.unmappableCharacterException(sp - 1); + throwUnmappable(sp - 1); } } else { dst[dp++] = (byte)(0xf0 | ((uc >> 18))); @@ -1510,14 +1396,10 @@ public final class String /** * {@return the exact size required to UTF_8 encode this UTF16 string} - * - * @param exClass The exception class where any non-null value indicates - * malformed or unmappable bytes will result in an exception - * to be thrown instead of getting discarded. - * @param The exception type parameter to enable callers to avoid - * having to declare the exception + * @param val UTF16 encoded byte array + * @param doReplace true to replace unmappable characters */ - private static long computeSizeUTF8_UTF16(byte[] val, Class exClass) throws E { + private static long computeSizeUTF8_UTF16(byte[] val, boolean doReplace) { long dp = 0L; int sp = 0; int sl = val.length >> 1; @@ -1536,10 +1418,10 @@ public final class String uc = Character.toCodePoint(c, c2); } if (uc < 0) { - if (exClass == null) { + if (doReplace) { dp++; } else { - throw String.unmappableCharacterException(sp - 1); + throwUnmappable(sp - 1); } } else { dp += 4; diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java index bb1775fbc6b..a40c27bbf47 100644 --- a/src/java.base/share/classes/java/lang/System.java +++ b/src/java.base/share/classes/java/lang/System.java @@ -2124,7 +2124,6 @@ public final class System { public int countPositives(byte[] bytes, int offset, int length) { return StringCoding.countPositives(bytes, offset, length); } - public int countNonZeroAscii(String s) { return StringCoding.countNonZeroAscii(s); } @@ -2133,24 +2132,21 @@ public final class System { return String.newStringWithLatin1Bytes(bytes); } - public String uncheckedNewStringOrThrow(byte[] bytes, Charset cs) throws CharacterCodingException { - return String.newStringOrThrow(bytes, cs); + public String uncheckedNewStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException { + return String.newStringNoRepl(bytes, cs); } - public char uncheckedGetUTF16Char(byte[] bytes, int index) { return StringUTF16.getChar(bytes, index); } - public void uncheckedPutCharUTF16(byte[] bytes, int index, int ch) { StringUTF16.putChar(bytes, index, ch); } - - public byte[] uncheckedGetBytesOrThrow(String s, Charset cs) throws CharacterCodingException { - return String.getBytesOrThrow(s, cs); + public byte[] uncheckedGetBytesNoRepl(String s, Charset cs) throws CharacterCodingException { + return String.getBytesNoRepl(s, cs); } - public byte[] getBytesUTF8OrThrow(String s) throws CharacterCodingException { - return String.getBytesUTF8OrThrow(s); + public byte[] getBytesUTF8NoRepl(String s) { + return String.getBytesUTF8NoRepl(s); } public void inflateBytesToChars(byte[] src, int srcOff, char[] dst, int dstOff, int len) { diff --git a/src/java.base/share/classes/java/nio/file/Files.java b/src/java.base/share/classes/java/nio/file/Files.java index 80c771f5306..f8278fa2642 100644 --- a/src/java.base/share/classes/java/nio/file/Files.java +++ b/src/java.base/share/classes/java/nio/file/Files.java @@ -3043,7 +3043,7 @@ public final class Files { byte[] ba = readAllBytes(path); if (path.getClass().getModule() != Object.class.getModule()) ba = ba.clone(); - return JLA.uncheckedNewStringOrThrow(ba, cs); + return JLA.uncheckedNewStringNoRepl(ba, cs); } /** @@ -3362,7 +3362,7 @@ public final class Files { Objects.requireNonNull(csq); Objects.requireNonNull(cs); - byte[] bytes = JLA.uncheckedGetBytesOrThrow(String.valueOf(csq), cs); + byte[] bytes = JLA.uncheckedGetBytesNoRepl(String.valueOf(csq), cs); if (path.getClass().getModule() != Object.class.getModule()) bytes = bytes.clone(); write(path, bytes, options); diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java index b9906d348e3..8b812eba202 100644 --- a/src/java.base/share/classes/java/util/zip/ZipCoder.java +++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java @@ -256,7 +256,7 @@ class ZipCoder { try { // Copy subrange for exclusive use by the string being created byte[] bytes = Arrays.copyOfRange(ba, off, off + length); - return JLA.uncheckedNewStringOrThrow(bytes, StandardCharsets.UTF_8); + return JLA.uncheckedNewStringNoRepl(bytes, StandardCharsets.UTF_8); } catch (CharacterCodingException cce) { throw new IllegalArgumentException(cce); } @@ -264,11 +264,7 @@ class ZipCoder { @Override byte[] getBytes(String s) { - try { - return JLA.getBytesUTF8OrThrow(s); - } catch (CharacterCodingException cce) { - throw new IllegalArgumentException(cce); - } + return JLA.getBytesUTF8NoRepl(s); } @Override @@ -282,6 +278,8 @@ class ZipCoder { // Non-ASCII, fall back to decoding a String // We avoid using decoder() here since the UTF8ZipCoder is // shared and that decoder is not thread safe. + // We use the JLA.newStringUTF8NoRepl variant to throw + // exceptions eagerly when opening ZipFiles return hash(toString(a, off, len)); } int h = ArraysSupport.hashCodeOfUnsigned(a, off, len, 0); @@ -298,7 +296,7 @@ class ZipCoder { @Override byte compare(String str, byte[] b, int off, int len, boolean matchDirectory) { try { - byte[] encoded = JLA.uncheckedGetBytesOrThrow(str, UTF_8.INSTANCE); + byte[] encoded = JLA.uncheckedGetBytesNoRepl(str, UTF_8.INSTANCE); int mismatch = Arrays.mismatch(encoded, 0, encoded.length, b, off, off+len); if (mismatch == -1) { return EXACT_MATCH; diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java index c7d7c86b932..aa5b6e438f5 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java @@ -45,6 +45,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executor; import java.util.concurrent.RejectedExecutionException; +import java.util.function.BiFunction; import java.util.stream.Stream; import jdk.internal.loader.NativeLibraries; @@ -331,7 +332,7 @@ public interface JavaLangAccess { /** * Constructs a new {@code String} by decoding the specified byte array - * using the specified {@code Charset}. + * using the specified {@linkplain java.nio.charset.Charset charset}. *

* WARNING: The caller of this method shall relinquish and transfer the * ownership of the byte array to the callee, since the latter will not @@ -342,22 +343,25 @@ public interface JavaLangAccess { * @return the newly created string * @throws CharacterCodingException for malformed or unmappable bytes */ - String uncheckedNewStringOrThrow(byte[] bytes, Charset cs) throws CharacterCodingException; + String uncheckedNewStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException; /** - * {@return the sequence of bytes obtained by encoding the given string in - * the specified {@code Charset}} + * Encode the given string into a sequence of bytes using the specified + * {@linkplain java.nio.charset.Charset charset}. *

* WARNING: This method returns the {@code byte[]} backing the provided * {@code String}, if the input is ASCII. Hence, the returned byte array * must not be modified. + *

+ * This method throws {@code CharacterCodingException} instead of replacing + * when malformed input or unmappable characters are encountered. * * @param s the string to encode * @param cs the charset - * @throws NullPointerException If {@code s} or {@code cs} is null + * @return the encoded bytes * @throws CharacterCodingException for malformed input or unmappable characters */ - byte[] uncheckedGetBytesOrThrow(String s, Charset cs) throws CharacterCodingException; + byte[] uncheckedGetBytesNoRepl(String s, Charset cs) throws CharacterCodingException; /** * Get the {@code char} at {@code index} in a {@code byte[]} in internal @@ -383,13 +387,13 @@ public interface JavaLangAccess { void uncheckedPutCharUTF16(byte[] bytes, int index, int ch); /** - * {@return the sequence of bytes obtained by encoding the given string in UTF-8} + * Encode the given string into a sequence of bytes using utf8. * * @param s the string to encode - * @throws NullPointerException If {@code s} is null - * @throws CharacterCodingException For malformed input or unmappable characters + * @return the encoded bytes in utf8 + * @throws IllegalArgumentException for malformed surrogates */ - byte[] getBytesUTF8OrThrow(String s) throws CharacterCodingException; + byte[] getBytesUTF8NoRepl(String s); /** * Inflated copy from {@code byte[]} to {@code char[]}, as defined by diff --git a/src/java.base/unix/classes/sun/nio/fs/UnixPath.java b/src/java.base/unix/classes/sun/nio/fs/UnixPath.java index 5a77bb0b935..5dfc73f57aa 100644 --- a/src/java.base/unix/classes/sun/nio/fs/UnixPath.java +++ b/src/java.base/unix/classes/sun/nio/fs/UnixPath.java @@ -126,7 +126,7 @@ class UnixPath implements Path { private static byte[] encode(UnixFileSystem fs, String input) { input = fs.normalizeNativePath(input); try { - return JLA.uncheckedGetBytesOrThrow(input, Util.jnuEncoding()); + return JLA.uncheckedGetBytesNoRepl(input, Util.jnuEncoding()); } catch (CharacterCodingException cce) { throw new InvalidPathException(input, "Malformed input or input contains unmappable characters"); diff --git a/test/jdk/java/lang/String/OrThrowTest.java b/test/jdk/java/lang/String/NoReplTest.java similarity index 79% rename from test/jdk/java/lang/String/OrThrowTest.java rename to test/jdk/java/lang/String/NoReplTest.java index 340a190b4eb..1817a1ffe73 100644 --- a/test/jdk/java/lang/String/OrThrowTest.java +++ b/test/jdk/java/lang/String/NoReplTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,8 +24,8 @@ /* * @test * @bug 8286287 8288589 - * @summary Tests for *OrThrow() shared secret methods. - * @run testng OrThrowTest + * @summary Tests for *NoRepl() shared secret methods. + * @run testng NoReplTest * @modules jdk.charsets */ @@ -39,17 +39,17 @@ import static java.nio.charset.StandardCharsets.UTF_16; import org.testng.annotations.Test; @Test -public class OrThrowTest { +public class NoReplTest { private final static byte[] MALFORMED_UTF16 = {(byte)0x00, (byte)0x20, (byte)0x00}; private final static String MALFORMED_WINDOWS_1252 = "\u0080\u041e"; private final static Charset WINDOWS_1252 = Charset.forName("windows-1252"); /** - * Verifies {@code uncheckedNewStringOrThrow()} throws a {@link CharacterCodingException}. - * The method is invoked by {@code Files.readString()} method. + * Verifies newStringNoRepl() throws a CharacterCodingException. + * The method is invoked by `Files.readString()` method. */ @Test - public void uncheckedNewStringOrThrowTest() throws IOException { + public void newStringNoReplTest() throws IOException { var f = Files.createTempFile(null, null); try (var fos = Files.newOutputStream(f)) { fos.write(MALFORMED_UTF16); @@ -67,11 +67,11 @@ public class OrThrowTest { } /** - * Verifies {@code uncheckedGetBytesOrThrow()} throws a {@link CharacterCodingException}. - * The method is invoked by {@code Files.writeString()} method. + * Verifies getBytesNoRepl() throws a CharacterCodingException. + * The method is invoked by `Files.writeString()` method. */ @Test - public void uncheckedGetBytesOrThrowTest() throws IOException { + public void getBytesNoReplTest() throws IOException { var f = Files.createTempFile(null, null); try { Files.writeString(f, MALFORMED_WINDOWS_1252, WINDOWS_1252);