diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 8acb8d8514b..15b8e98369e 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -604,14 +604,14 @@ public final class String } byte[] utf16 = StringUTF16.newBytesFor(length); StringLatin1.inflate(latin1, 0, utf16, 0, dp); - dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp); + dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp, true); if (dp != length) { utf16 = Arrays.copyOf(utf16, dp << 1); } return new String(utf16, UTF16); } else { // !COMPACT_STRINGS byte[] dst = StringUTF16.newBytesFor(length); - int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0); + int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true); if (dp != length) { dst = Arrays.copyOf(dst, dp << 1); } @@ -689,24 +689,12 @@ public final class String } /* - * {@return a new string by decoding from the given UTF-8 bytes array} - *
- * WARNING: The caller of this method is assumed to have relinquished
- * and transferred the ownership of the byte array. It can thus be
- * exclusively used to construct the {@code String}.
- *
- * @param bytes byte array containing UTF-8 encoded characters
- * @param offset the index of the first byte to decode
- * @param length the number of bytes to decode
- * @throws NullPointerException If {@code bytes} is null
- * @throws StringIndexOutOfBoundsException If {@code offset} is negative,
- * {@code length} is negative, or {@code offset} is greater than
- * {@code bytes.length - length}
- * @throws CharacterCodingException for malformed input or unmappable characters
+ * Throws iae, instead of replacing, if malformed or unmappable.
+ * The byte array can be exclusively used to construct
+ * the string and is not modified or used for any other purpose.
*/
- private static String newStringUTF8OrThrow(byte[] bytes, int offset, int length)
- throws CharacterCodingException {
- checkBoundsOffCount(offset, length, bytes.length); // Implicit null check on `bytes`
+ private static String newStringUTF8NoRepl(byte[] bytes, int offset, int length) {
+ checkBoundsOffCount(offset, length, bytes.length);
if (length == 0) {
return "";
}
@@ -757,10 +745,10 @@ public final class String
StringLatin1.inflate(dst, 0, buf, 0, dp);
dst = buf;
}
- dp = decodeUTF8_UTF16OrThrow(bytes, offset, sl, dst, dp);
+ dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
} else { // !COMPACT_STRINGS
dst = StringUTF16.newBytesFor(length);
- dp = decodeUTF8_UTF16OrThrow(bytes, offset, offset + length, dst, 0);
+ dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, false);
}
if (dp != length) {
dst = Arrays.copyOf(dst, dp << 1);
@@ -796,13 +784,26 @@ public final class String
*
* @throws CharacterCodingException for malformed input or unmappable characters
*/
- static String newStringOrThrow(byte[] src, Charset cs) throws CharacterCodingException {
+ static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
+ try {
+ return newStringNoRepl1(src, cs);
+ } catch (IllegalArgumentException e) {
+ //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
+ Throwable cause = e.getCause();
+ if (cause instanceof MalformedInputException mie) {
+ throw mie;
+ }
+ throw (CharacterCodingException)cause;
+ }
+ }
+
+ private static String newStringNoRepl1(byte[] src, Charset cs) {
int len = src.length;
if (len == 0) {
return "";
}
if (cs == UTF_8.INSTANCE) {
- return newStringUTF8OrThrow(src, 0, src.length);
+ return newStringUTF8NoRepl(src, 0, src.length);
}
if (cs == ISO_8859_1.INSTANCE) {
if (COMPACT_STRINGS)
@@ -815,7 +816,7 @@ public final class String
return new String(src, LATIN1);
return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
} else {
- throw malformedASCII(src);
+ throwMalformed(src);
}
}
@@ -830,7 +831,13 @@ public final class String
}
int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
- int caLen = decodeWithDecoder(cd, ca, src, 0, src.length);
+ int caLen;
+ try {
+ caLen = decodeWithDecoder(cd, ca, src, 0, src.length);
+ } catch (CharacterCodingException x) {
+ // throw via IAE
+ throw new IllegalArgumentException(x);
+ }
if (COMPACT_STRINGS) {
byte[] val = StringUTF16.compress(ca, 0, caLen);
byte coder = StringUTF16.coderFromArrayLen(val, caLen);
@@ -867,7 +874,7 @@ public final class String
private static byte[] encode(Charset cs, byte coder, byte[] val) {
if (cs == UTF_8.INSTANCE) {
- return encodeUTF8(coder, val);
+ return encodeUTF8(coder, val, true);
}
if (cs == ISO_8859_1.INSTANCE) {
return encode8859_1(coder, val);
@@ -875,30 +882,13 @@ public final class String
if (cs == US_ASCII.INSTANCE) {
return encodeASCII(coder, val);
}
- return encodeWithEncoder(cs, coder, val, null);
+ return encodeWithEncoder(cs, coder, val, true);
}
- /**
- * {@return the byte array obtained by first decoding {@code val} with
- * {@code coder}, and then encoding the result with the encoder of {@code
- * cs}}
- *
- * @param cs a charset to obtain the encoder from
- * @param coder a coder to decode {@code val} with
- * @param val a string byte array encoded with {@code coder}
- * @param exClass The exception class where any non-null value indicates
- * malformed or unmappable bytes will result in an exception
- * to be thrown instead of getting replaced.
- * @param
- * WARNING: This method returns the {@code byte[]} backing the provided
- * {@code String}, if the input is ASCII. Hence, the returned byte array
- * must not be modified.
- *
- * @param s the string to encode
- * @param cs the charset
- * @throws NullPointerException If {@code s} or {@code cs} is null
- * @throws CharacterCodingException For malformed input or unmappable characters
+ /*
+ * Throws CCE, instead of replacing, if unmappable.
*/
- static byte[] getBytesOrThrow(String s, Charset cs) throws CharacterCodingException {
- Objects.requireNonNull(cs);
- byte[] val = s.value(); // Implicit null check on `s`
+ static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
+ try {
+ return getBytesNoRepl1(s, cs);
+ } catch (IllegalArgumentException e) {
+ //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
+ Throwable cause = e.getCause();
+ if (cause instanceof UnmappableCharacterException) {
+ throw (UnmappableCharacterException)cause;
+ }
+ throw (CharacterCodingException)cause;
+ }
+ }
+
+ private static byte[] getBytesNoRepl1(String s, Charset cs) {
+ byte[] val = s.value();
byte coder = s.coder();
if (cs == UTF_8.INSTANCE) {
if (coder == LATIN1 && isASCII(val)) {
return val;
}
- return encodeUTF8OrThrow(coder, val);
+ return encodeUTF8(coder, val, false);
}
if (cs == ISO_8859_1.INSTANCE) {
if (coder == LATIN1) {
return val;
}
- return encode8859_1OrThrow(coder, val);
+ return encode8859_1(coder, val, false);
}
if (cs == US_ASCII.INSTANCE) {
if (coder == LATIN1) {
if (isASCII(val)) {
return val;
} else {
- throw unmappableASCII(val);
+ throwUnmappable(val);
}
}
}
- return encodeWithEncoder(cs, coder, val, CharacterCodingException.class);
+ return encodeWithEncoder(cs, coder, val, false);
}
- /**
- * {@return the byte array obtained by first decoding {@code val} with
- * {@code coder}, and then encoding the result with US-ASCII}
- *
- * @param coder a coder to decode {@code val} with
- * @param val a string byte array encoded with {@code coder}
- */
private static byte[] encodeASCII(byte coder, byte[] val) {
if (coder == LATIN1) {
int positives = StringCoding.countPositives(val, 0, val.length);
@@ -1052,26 +1031,10 @@ public final class String
}
private static byte[] encode8859_1(byte coder, byte[] val) {
- return encode8859_1(coder, val, null);
+ return encode8859_1(coder, val, true);
}
- private static byte[] encode8859_1OrThrow(byte coder, byte[] val) throws UnmappableCharacterException {
- return encode8859_1(coder, val, UnmappableCharacterException.class);
- }
-
- /**
- * {@return the byte array obtained by first decoding {@code val} with
- * {@code coder}, and then encoding the result with ISO-8859-1}
- *
- * @param coder a coder to decode {@code val} with
- * @param val a string byte array encoded with {@code coder}
- * @param exClass The exception class where any non-null value indicates
- * malformed or unmappable bytes will result in an exception
- * to be thrown instead of getting replaced.
- * @param
* WARNING: The caller of this method shall relinquish and transfer the
* ownership of the byte array to the callee, since the latter will not
@@ -342,22 +343,25 @@ public interface JavaLangAccess {
* @return the newly created string
* @throws CharacterCodingException for malformed or unmappable bytes
*/
- String uncheckedNewStringOrThrow(byte[] bytes, Charset cs) throws CharacterCodingException;
+ String uncheckedNewStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException;
/**
- * {@return the sequence of bytes obtained by encoding the given string in
- * the specified {@code Charset}}
+ * Encode the given string into a sequence of bytes using the specified
+ * {@linkplain java.nio.charset.Charset charset}.
*
* WARNING: This method returns the {@code byte[]} backing the provided
* {@code String}, if the input is ASCII. Hence, the returned byte array
* must not be modified.
+ *
+ * This method throws {@code CharacterCodingException} instead of replacing
+ * when malformed input or unmappable characters are encountered.
*
* @param s the string to encode
* @param cs the charset
- * @throws NullPointerException If {@code s} or {@code cs} is null
+ * @return the encoded bytes
* @throws CharacterCodingException for malformed input or unmappable characters
*/
- byte[] uncheckedGetBytesOrThrow(String s, Charset cs) throws CharacterCodingException;
+ byte[] uncheckedGetBytesNoRepl(String s, Charset cs) throws CharacterCodingException;
/**
* Get the {@code char} at {@code index} in a {@code byte[]} in internal
@@ -383,13 +387,13 @@ public interface JavaLangAccess {
void uncheckedPutCharUTF16(byte[] bytes, int index, int ch);
/**
- * {@return the sequence of bytes obtained by encoding the given string in UTF-8}
+ * Encode the given string into a sequence of bytes using utf8.
*
* @param s the string to encode
- * @throws NullPointerException If {@code s} is null
- * @throws CharacterCodingException For malformed input or unmappable characters
+ * @return the encoded bytes in utf8
+ * @throws IllegalArgumentException for malformed surrogates
*/
- byte[] getBytesUTF8OrThrow(String s) throws CharacterCodingException;
+ byte[] getBytesUTF8NoRepl(String s);
/**
* Inflated copy from {@code byte[]} to {@code char[]}, as defined by
diff --git a/src/java.base/unix/classes/sun/nio/fs/UnixPath.java b/src/java.base/unix/classes/sun/nio/fs/UnixPath.java
index 5a77bb0b935..5dfc73f57aa 100644
--- a/src/java.base/unix/classes/sun/nio/fs/UnixPath.java
+++ b/src/java.base/unix/classes/sun/nio/fs/UnixPath.java
@@ -126,7 +126,7 @@ class UnixPath implements Path {
private static byte[] encode(UnixFileSystem fs, String input) {
input = fs.normalizeNativePath(input);
try {
- return JLA.uncheckedGetBytesOrThrow(input, Util.jnuEncoding());
+ return JLA.uncheckedGetBytesNoRepl(input, Util.jnuEncoding());
} catch (CharacterCodingException cce) {
throw new InvalidPathException(input,
"Malformed input or input contains unmappable characters");
diff --git a/test/jdk/java/lang/String/OrThrowTest.java b/test/jdk/java/lang/String/NoReplTest.java
similarity index 79%
rename from test/jdk/java/lang/String/OrThrowTest.java
rename to test/jdk/java/lang/String/NoReplTest.java
index 340a190b4eb..1817a1ffe73 100644
--- a/test/jdk/java/lang/String/OrThrowTest.java
+++ b/test/jdk/java/lang/String/NoReplTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,8 +24,8 @@
/*
* @test
* @bug 8286287 8288589
- * @summary Tests for *OrThrow() shared secret methods.
- * @run testng OrThrowTest
+ * @summary Tests for *NoRepl() shared secret methods.
+ * @run testng NoReplTest
* @modules jdk.charsets
*/
@@ -39,17 +39,17 @@ import static java.nio.charset.StandardCharsets.UTF_16;
import org.testng.annotations.Test;
@Test
-public class OrThrowTest {
+public class NoReplTest {
private final static byte[] MALFORMED_UTF16 = {(byte)0x00, (byte)0x20, (byte)0x00};
private final static String MALFORMED_WINDOWS_1252 = "\u0080\u041e";
private final static Charset WINDOWS_1252 = Charset.forName("windows-1252");
/**
- * Verifies {@code uncheckedNewStringOrThrow()} throws a {@link CharacterCodingException}.
- * The method is invoked by {@code Files.readString()} method.
+ * Verifies newStringNoRepl() throws a CharacterCodingException.
+ * The method is invoked by `Files.readString()` method.
*/
@Test
- public void uncheckedNewStringOrThrowTest() throws IOException {
+ public void newStringNoReplTest() throws IOException {
var f = Files.createTempFile(null, null);
try (var fos = Files.newOutputStream(f)) {
fos.write(MALFORMED_UTF16);
@@ -67,11 +67,11 @@ public class OrThrowTest {
}
/**
- * Verifies {@code uncheckedGetBytesOrThrow()} throws a {@link CharacterCodingException}.
- * The method is invoked by {@code Files.writeString()} method.
+ * Verifies getBytesNoRepl() throws a CharacterCodingException.
+ * The method is invoked by `Files.writeString()` method.
*/
@Test
- public void uncheckedGetBytesOrThrowTest() throws IOException {
+ public void getBytesNoReplTest() throws IOException {
var f = Files.createTempFile(null, null);
try {
Files.writeString(f, MALFORMED_WINDOWS_1252, WINDOWS_1252);