From 00c7f914c665a77842d32eb8f760dcbbbda66554 Mon Sep 17 00:00:00 2001 From: Naoto Sato Date: Tue, 11 Jul 2023 16:10:34 +0000 Subject: [PATCH] 8310047: Add UTF-32 based Charsets into StandardCharsets Reviewed-by: alanb, lancea, bpb, jpai, jlu --- .../classes/java/nio/charset/Charset.java | 29 +++++++++++++++---- .../java/nio/charset/StandardCharsets.java | 21 +++++++++++++- .../sun/nio/cs/StandardCharsets.java.template | 3 ++ .../charset/StandardCharsets/Standard.java | 17 +++++++++-- 4 files changed, 60 insertions(+), 10 deletions(-) diff --git a/src/java.base/share/classes/java/nio/charset/Charset.java b/src/java.base/share/classes/java/nio/charset/Charset.java index b87dfc16436..83dc81ba259 100644 --- a/src/java.base/share/classes/java/nio/charset/Charset.java +++ b/src/java.base/share/classes/java/nio/charset/Charset.java @@ -168,37 +168,54 @@ import java.util.TreeMap; * {@code UTF-16} * Sixteen-bit UCS Transformation Format, * byte order identified by an optional byte-order mark + * {@code UTF-32BE} + * Thirty-two-bit UCS Transformation Format, + * big-endian byte order + * {@code UTF-32LE} + * Thirty-two-bit UCS Transformation Format, + * little-endian byte order + * {@code UTF-32} + * Thirty-two-bit UCS Transformation Format, + * byte order identified by an optional byte-order mark * * * *

The {@code UTF-8} charset is specified by RFC 2279; the * transformation format upon which it is based is specified in - * Amendment 2 of ISO 10646-1 and is also described in the Unicode * Standard. * *

The {@code UTF-16} charsets are specified by RFC 2781; the * transformation formats upon which they are based are specified in - * Amendment 1 of ISO 10646-1 and are also described in the Unicode * Standard. * - *

The {@code UTF-16} charsets use sixteen-bit quantities and are + *

The {@code UTF-32} charsets are based upon transformation formats + * which are specified in + * ISO 10646-1 and are also described in the Unicode + * Standard. + * + *

The {@code UTF-16} and {@code UTF-32} charsets use sixteen-bit and thirty-two-bit + * quantities respectively, and are * therefore sensitive to byte order. In these encodings the byte order of a * stream may be indicated by an initial byte-order mark represented by - * the Unicode character '\uFEFF'. Byte-order marks are handled + * the Unicode character {@code U+FEFF}. Byte-order marks are handled * as follows: * *