From 992a8ef46bc0a06c70fd5f4f307dbd20e402ed33 Mon Sep 17 00:00:00 2001 From: Daniel Gredler Date: Tue, 27 Jan 2026 13:20:26 +0000 Subject: [PATCH] 8376226: CharsetEncoder.canEncode(CharSequence) is much slower than necessary Reviewed-by: alanb, naoto --- .../nio/charset/Charset-X-Coder.java.template | 18 +- .../share/classes/sun/nio/cs/DoubleByte.java | 12 +- .../share/classes/sun/nio/cs/ISO_8859_1.java | 12 +- .../share/classes/sun/nio/cs/SingleByte.java | 12 +- .../share/classes/sun/nio/cs/US_ASCII.java | 12 +- .../bench/java/nio/CharsetCanEncode.java | 187 ++++++++++++++++++ 6 files changed, 245 insertions(+), 8 deletions(-) create mode 100644 test/micro/org/openjdk/bench/java/nio/CharsetCanEncode.java diff --git a/src/java.base/share/classes/java/nio/charset/Charset-X-Coder.java.template b/src/java.base/share/classes/java/nio/charset/Charset-X-Coder.java.template index e900c2eca0f..aca987ed678 100644 --- a/src/java.base/share/classes/java/nio/charset/Charset-X-Coder.java.template +++ b/src/java.base/share/classes/java/nio/charset/Charset-X-Coder.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -819,6 +819,12 @@ public abstract class Charset$Coder$ { */ public final $Otype$Buffer $code$($Itype$Buffer in) throws CharacterCodingException + { + return $code$(in, true); + } + + private $Otype$Buffer $code$($Itype$Buffer in, boolean throwOnError) + throws CharacterCodingException { int n = Math.min((int)(in.remaining() * average$ItypesPerOtype$()), ArraysSupport.SOFT_MAX_ARRAY_LENGTH); @@ -844,7 +850,11 @@ public abstract class Charset$Coder$ { out = o; continue; } - cr.throwException(); + if (throwOnError) { + cr.throwException(); + } else { + return null; + } } out.flip(); return out; @@ -938,7 +948,8 @@ public abstract class Charset$Coder$ { try { onMalformedInput(CodingErrorAction.REPORT); onUnmappableCharacter(CodingErrorAction.REPORT); - encode(cb); + ByteBuffer bb = encode(cb, false); + return bb != null; } catch (CharacterCodingException x) { return false; } finally { @@ -946,7 +957,6 @@ public abstract class Charset$Coder$ { onUnmappableCharacter(ua); reset(); } - return true; } /** diff --git a/src/java.base/share/classes/sun/nio/cs/DoubleByte.java b/src/java.base/share/classes/sun/nio/cs/DoubleByte.java index 2a4dbdc95ed..0aaae14bbf9 100644 --- a/src/java.base/share/classes/sun/nio/cs/DoubleByte.java +++ b/src/java.base/share/classes/sun/nio/cs/DoubleByte.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2009, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -583,6 +583,16 @@ public class DoubleByte { return encodeChar(c) != UNMAPPABLE_ENCODING; } + public boolean canEncode(CharSequence cs) { + int length = cs.length(); + for (int i = 0; i < length; i++) { + if (!canEncode(cs.charAt(i))) { + return false; + } + } + return true; + } + protected Surrogate.Parser sgp() { if (sgp == null) sgp = new Surrogate.Parser(); diff --git a/src/java.base/share/classes/sun/nio/cs/ISO_8859_1.java b/src/java.base/share/classes/sun/nio/cs/ISO_8859_1.java index 39215bfa93d..9240ac3f380 100644 --- a/src/java.base/share/classes/sun/nio/cs/ISO_8859_1.java +++ b/src/java.base/share/classes/sun/nio/cs/ISO_8859_1.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -136,6 +136,16 @@ public class ISO_8859_1 return c <= '\u00FF'; } + public boolean canEncode(CharSequence cs) { + int length = cs.length(); + for (int i = 0; i < length; i++) { + if (!canEncode(cs.charAt(i))) { + return false; + } + } + return true; + } + public boolean isLegalReplacement(byte[] repl) { return true; // we accept any byte value } diff --git a/src/java.base/share/classes/sun/nio/cs/SingleByte.java b/src/java.base/share/classes/sun/nio/cs/SingleByte.java index 59887b944d3..d4127b7c043 100644 --- a/src/java.base/share/classes/sun/nio/cs/SingleByte.java +++ b/src/java.base/share/classes/sun/nio/cs/SingleByte.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -201,6 +201,16 @@ public class SingleByte return encode(c) != UNMAPPABLE_ENCODING; } + public boolean canEncode(CharSequence cs) { + int length = cs.length(); + for (int i = 0; i < length; i++) { + if (!canEncode(cs.charAt(i))) { + return false; + } + } + return true; + } + public boolean isLegalReplacement(byte[] repl) { return ((repl.length == 1 && repl[0] == (byte)'?') || super.isLegalReplacement(repl)); diff --git a/src/java.base/share/classes/sun/nio/cs/US_ASCII.java b/src/java.base/share/classes/sun/nio/cs/US_ASCII.java index bb84ab1bd4b..61c4948e949 100644 --- a/src/java.base/share/classes/sun/nio/cs/US_ASCII.java +++ b/src/java.base/share/classes/sun/nio/cs/US_ASCII.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -139,6 +139,16 @@ public class US_ASCII return c < 0x80; } + public boolean canEncode(CharSequence cs) { + int length = cs.length(); + for (int i = 0; i < length; i++) { + if (!canEncode(cs.charAt(i))) { + return false; + } + } + return true; + } + public boolean isLegalReplacement(byte[] repl) { return (repl.length == 1 && repl[0] >= 0) || super.isLegalReplacement(repl); diff --git a/test/micro/org/openjdk/bench/java/nio/CharsetCanEncode.java b/test/micro/org/openjdk/bench/java/nio/CharsetCanEncode.java new file mode 100644 index 00000000000..ebfbc217a95 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/nio/CharsetCanEncode.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.nio; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Fork(3) +public class CharsetCanEncode { + + private static final char ALEF_CHAR = '\u05d0'; + private static final String ALEF_STRING = "\u05d0"; + + // sun.nio.cs.US_ASCII + private CharsetEncoder ascii = Charset.forName("US-ASCII").newEncoder(); + + // sun.nio.cs.ISO_8859_1 + private CharsetEncoder iso88591 = Charset.forName("ISO-8859-1").newEncoder(); + + // sun.nio.cs.SingleByte + private CharsetEncoder iso88592 = Charset.forName("ISO-8859-2").newEncoder(); + + // sun.nio.cs.DoubleByte + private CharsetEncoder shiftjis = Charset.forName("Shift_JIS").newEncoder(); + + // sun.nio.cs.UTF_8 + private CharsetEncoder utf8 = Charset.forName("UTF-8").newEncoder(); + + // sun.nio.cs.UTF_16LE + private CharsetEncoder utf16le = Charset.forName("UTF-16LE").newEncoder(); + + @Benchmark + public boolean asciiCanEncodeCharYes() { + return ascii.canEncode('D'); + } + + @Benchmark + public boolean asciiCanEncodeStringYes() { + return ascii.canEncode("D"); + } + + @Benchmark + public boolean asciiCanEncodeCharNo() { + return ascii.canEncode(ALEF_CHAR); + } + + @Benchmark + public boolean asciiCanEncodeStringNo() { + return ascii.canEncode(ALEF_STRING); + } + + @Benchmark + public boolean iso88591CanEncodeCharYes() { + return iso88591.canEncode('D'); + } + + @Benchmark + public boolean iso88591CanEncodeStringYes() { + return iso88591.canEncode("D"); + } + + @Benchmark + public boolean iso88591CanEncodeCharNo() { + return iso88591.canEncode(ALEF_CHAR); + } + + @Benchmark + public boolean iso88591CanEncodeStringNo() { + return iso88591.canEncode(ALEF_STRING); + } + + @Benchmark + public boolean iso88592CanEncodeCharYes() { + return iso88592.canEncode('D'); + } + + @Benchmark + public boolean iso88592CanEncodeStringYes() { + return iso88592.canEncode("D"); + } + + @Benchmark + public boolean iso88592CanEncodeCharNo() { + return iso88592.canEncode(ALEF_CHAR); + } + + @Benchmark + public boolean iso88592CanEncodeStringNo() { + return iso88592.canEncode(ALEF_STRING); + } + + @Benchmark + public boolean shiftjisCanEncodeCharYes() { + return shiftjis.canEncode('D'); + } + + @Benchmark + public boolean shiftjisCanEncodeStringYes() { + return shiftjis.canEncode("D"); + } + + @Benchmark + public boolean shiftjisCanEncodeCharNo() { + return shiftjis.canEncode(ALEF_CHAR); + } + + @Benchmark + public boolean shiftjisCanEncodeStringNo() { + return shiftjis.canEncode(ALEF_STRING); + } + + @Benchmark + public boolean utf8CanEncodeCharYes() { + return utf8.canEncode('D'); + } + + @Benchmark + public boolean utf8CanEncodeStringYes() { + return utf8.canEncode("D"); + } + + @Benchmark + public boolean utf8CanEncodeCharNo() { + return utf8.canEncode(Character.MIN_SURROGATE); + } + + @Benchmark + public boolean utf8CanEncodeStringNo() { + return utf8.canEncode(String.valueOf(Character.MIN_SURROGATE)); + } + + @Benchmark + public boolean utf16leCanEncodeCharYes() { + return utf16le.canEncode('D'); + } + + @Benchmark + public boolean utf16leCanEncodeStringYes() { + return utf16le.canEncode("D"); + } + + @Benchmark + public boolean utf16leCanEncodeCharNo() { + return utf16le.canEncode(Character.MIN_SURROGATE); + } + + @Benchmark + public boolean utf16leCanEncodeStringNo() { + return utf16le.canEncode(String.valueOf(Character.MIN_SURROGATE)); + } +}