mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 03:58:21 +00:00
8376226: CharsetEncoder.canEncode(CharSequence) is much slower than necessary
Reviewed-by: alanb, naoto
This commit is contained in:
parent
40d1b642a4
commit
992a8ef46b
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -819,6 +819,12 @@ public abstract class Charset$Coder$ {
|
||||
*/
|
||||
public final $Otype$Buffer $code$($Itype$Buffer in)
|
||||
throws CharacterCodingException
|
||||
{
|
||||
return $code$(in, true);
|
||||
}
|
||||
|
||||
private $Otype$Buffer $code$($Itype$Buffer in, boolean throwOnError)
|
||||
throws CharacterCodingException
|
||||
{
|
||||
int n = Math.min((int)(in.remaining() * average$ItypesPerOtype$()),
|
||||
ArraysSupport.SOFT_MAX_ARRAY_LENGTH);
|
||||
@ -844,7 +850,11 @@ public abstract class Charset$Coder$ {
|
||||
out = o;
|
||||
continue;
|
||||
}
|
||||
cr.throwException();
|
||||
if (throwOnError) {
|
||||
cr.throwException();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
out.flip();
|
||||
return out;
|
||||
@ -938,7 +948,8 @@ public abstract class Charset$Coder$ {
|
||||
try {
|
||||
onMalformedInput(CodingErrorAction.REPORT);
|
||||
onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
encode(cb);
|
||||
ByteBuffer bb = encode(cb, false);
|
||||
return bb != null;
|
||||
} catch (CharacterCodingException x) {
|
||||
return false;
|
||||
} finally {
|
||||
@ -946,7 +957,6 @@ public abstract class Charset$Coder$ {
|
||||
onUnmappableCharacter(ua);
|
||||
reset();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2009, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -583,6 +583,16 @@ public class DoubleByte {
|
||||
return encodeChar(c) != UNMAPPABLE_ENCODING;
|
||||
}
|
||||
|
||||
public boolean canEncode(CharSequence cs) {
|
||||
int length = cs.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (!canEncode(cs.charAt(i))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected Surrogate.Parser sgp() {
|
||||
if (sgp == null)
|
||||
sgp = new Surrogate.Parser();
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -136,6 +136,16 @@ public class ISO_8859_1
|
||||
return c <= '\u00FF';
|
||||
}
|
||||
|
||||
public boolean canEncode(CharSequence cs) {
|
||||
int length = cs.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (!canEncode(cs.charAt(i))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isLegalReplacement(byte[] repl) {
|
||||
return true; // we accept any byte value
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -201,6 +201,16 @@ public class SingleByte
|
||||
return encode(c) != UNMAPPABLE_ENCODING;
|
||||
}
|
||||
|
||||
public boolean canEncode(CharSequence cs) {
|
||||
int length = cs.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (!canEncode(cs.charAt(i))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isLegalReplacement(byte[] repl) {
|
||||
return ((repl.length == 1 && repl[0] == (byte)'?') ||
|
||||
super.isLegalReplacement(repl));
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -139,6 +139,16 @@ public class US_ASCII
|
||||
return c < 0x80;
|
||||
}
|
||||
|
||||
public boolean canEncode(CharSequence cs) {
|
||||
int length = cs.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (!canEncode(cs.charAt(i))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isLegalReplacement(byte[] repl) {
|
||||
return (repl.length == 1 && repl[0] >= 0) ||
|
||||
super.isLegalReplacement(repl);
|
||||
|
||||
187
test/micro/org/openjdk/bench/java/nio/CharsetCanEncode.java
Normal file
187
test/micro/org/openjdk/bench/java/nio/CharsetCanEncode.java
Normal file
@ -0,0 +1,187 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.java.nio;
|
||||
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
@Fork(3)
|
||||
public class CharsetCanEncode {
|
||||
|
||||
private static final char ALEF_CHAR = '\u05d0';
|
||||
private static final String ALEF_STRING = "\u05d0";
|
||||
|
||||
// sun.nio.cs.US_ASCII
|
||||
private CharsetEncoder ascii = Charset.forName("US-ASCII").newEncoder();
|
||||
|
||||
// sun.nio.cs.ISO_8859_1
|
||||
private CharsetEncoder iso88591 = Charset.forName("ISO-8859-1").newEncoder();
|
||||
|
||||
// sun.nio.cs.SingleByte
|
||||
private CharsetEncoder iso88592 = Charset.forName("ISO-8859-2").newEncoder();
|
||||
|
||||
// sun.nio.cs.DoubleByte
|
||||
private CharsetEncoder shiftjis = Charset.forName("Shift_JIS").newEncoder();
|
||||
|
||||
// sun.nio.cs.UTF_8
|
||||
private CharsetEncoder utf8 = Charset.forName("UTF-8").newEncoder();
|
||||
|
||||
// sun.nio.cs.UTF_16LE
|
||||
private CharsetEncoder utf16le = Charset.forName("UTF-16LE").newEncoder();
|
||||
|
||||
@Benchmark
|
||||
public boolean asciiCanEncodeCharYes() {
|
||||
return ascii.canEncode('D');
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean asciiCanEncodeStringYes() {
|
||||
return ascii.canEncode("D");
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean asciiCanEncodeCharNo() {
|
||||
return ascii.canEncode(ALEF_CHAR);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean asciiCanEncodeStringNo() {
|
||||
return ascii.canEncode(ALEF_STRING);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean iso88591CanEncodeCharYes() {
|
||||
return iso88591.canEncode('D');
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean iso88591CanEncodeStringYes() {
|
||||
return iso88591.canEncode("D");
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean iso88591CanEncodeCharNo() {
|
||||
return iso88591.canEncode(ALEF_CHAR);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean iso88591CanEncodeStringNo() {
|
||||
return iso88591.canEncode(ALEF_STRING);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean iso88592CanEncodeCharYes() {
|
||||
return iso88592.canEncode('D');
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean iso88592CanEncodeStringYes() {
|
||||
return iso88592.canEncode("D");
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean iso88592CanEncodeCharNo() {
|
||||
return iso88592.canEncode(ALEF_CHAR);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean iso88592CanEncodeStringNo() {
|
||||
return iso88592.canEncode(ALEF_STRING);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean shiftjisCanEncodeCharYes() {
|
||||
return shiftjis.canEncode('D');
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean shiftjisCanEncodeStringYes() {
|
||||
return shiftjis.canEncode("D");
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean shiftjisCanEncodeCharNo() {
|
||||
return shiftjis.canEncode(ALEF_CHAR);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean shiftjisCanEncodeStringNo() {
|
||||
return shiftjis.canEncode(ALEF_STRING);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean utf8CanEncodeCharYes() {
|
||||
return utf8.canEncode('D');
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean utf8CanEncodeStringYes() {
|
||||
return utf8.canEncode("D");
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean utf8CanEncodeCharNo() {
|
||||
return utf8.canEncode(Character.MIN_SURROGATE);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean utf8CanEncodeStringNo() {
|
||||
return utf8.canEncode(String.valueOf(Character.MIN_SURROGATE));
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean utf16leCanEncodeCharYes() {
|
||||
return utf16le.canEncode('D');
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean utf16leCanEncodeStringYes() {
|
||||
return utf16le.canEncode("D");
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean utf16leCanEncodeCharNo() {
|
||||
return utf16le.canEncode(Character.MIN_SURROGATE);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public boolean utf16leCanEncodeStringNo() {
|
||||
return utf16le.canEncode(String.valueOf(Character.MIN_SURROGATE));
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user