mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 03:58:21 +00:00
8376226: CharsetEncoder.canEncode(CharSequence) is much slower than necessary
Reviewed-by: alanb, naoto
This commit is contained in:
parent
40d1b642a4
commit
992a8ef46b
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -819,6 +819,12 @@ public abstract class Charset$Coder$ {
|
|||||||
*/
|
*/
|
||||||
public final $Otype$Buffer $code$($Itype$Buffer in)
|
public final $Otype$Buffer $code$($Itype$Buffer in)
|
||||||
throws CharacterCodingException
|
throws CharacterCodingException
|
||||||
|
{
|
||||||
|
return $code$(in, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private $Otype$Buffer $code$($Itype$Buffer in, boolean throwOnError)
|
||||||
|
throws CharacterCodingException
|
||||||
{
|
{
|
||||||
int n = Math.min((int)(in.remaining() * average$ItypesPerOtype$()),
|
int n = Math.min((int)(in.remaining() * average$ItypesPerOtype$()),
|
||||||
ArraysSupport.SOFT_MAX_ARRAY_LENGTH);
|
ArraysSupport.SOFT_MAX_ARRAY_LENGTH);
|
||||||
@ -844,7 +850,11 @@ public abstract class Charset$Coder$ {
|
|||||||
out = o;
|
out = o;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
cr.throwException();
|
if (throwOnError) {
|
||||||
|
cr.throwException();
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
out.flip();
|
out.flip();
|
||||||
return out;
|
return out;
|
||||||
@ -938,7 +948,8 @@ public abstract class Charset$Coder$ {
|
|||||||
try {
|
try {
|
||||||
onMalformedInput(CodingErrorAction.REPORT);
|
onMalformedInput(CodingErrorAction.REPORT);
|
||||||
onUnmappableCharacter(CodingErrorAction.REPORT);
|
onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||||
encode(cb);
|
ByteBuffer bb = encode(cb, false);
|
||||||
|
return bb != null;
|
||||||
} catch (CharacterCodingException x) {
|
} catch (CharacterCodingException x) {
|
||||||
return false;
|
return false;
|
||||||
} finally {
|
} finally {
|
||||||
@ -946,7 +957,6 @@ public abstract class Charset$Coder$ {
|
|||||||
onUnmappableCharacter(ua);
|
onUnmappableCharacter(ua);
|
||||||
reset();
|
reset();
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2009, 2025, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2009, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -583,6 +583,16 @@ public class DoubleByte {
|
|||||||
return encodeChar(c) != UNMAPPABLE_ENCODING;
|
return encodeChar(c) != UNMAPPABLE_ENCODING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean canEncode(CharSequence cs) {
|
||||||
|
int length = cs.length();
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
if (!canEncode(cs.charAt(i))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
protected Surrogate.Parser sgp() {
|
protected Surrogate.Parser sgp() {
|
||||||
if (sgp == null)
|
if (sgp == null)
|
||||||
sgp = new Surrogate.Parser();
|
sgp = new Surrogate.Parser();
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -136,6 +136,16 @@ public class ISO_8859_1
|
|||||||
return c <= '\u00FF';
|
return c <= '\u00FF';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean canEncode(CharSequence cs) {
|
||||||
|
int length = cs.length();
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
if (!canEncode(cs.charAt(i))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isLegalReplacement(byte[] repl) {
|
public boolean isLegalReplacement(byte[] repl) {
|
||||||
return true; // we accept any byte value
|
return true; // we accept any byte value
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2008, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -201,6 +201,16 @@ public class SingleByte
|
|||||||
return encode(c) != UNMAPPABLE_ENCODING;
|
return encode(c) != UNMAPPABLE_ENCODING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean canEncode(CharSequence cs) {
|
||||||
|
int length = cs.length();
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
if (!canEncode(cs.charAt(i))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isLegalReplacement(byte[] repl) {
|
public boolean isLegalReplacement(byte[] repl) {
|
||||||
return ((repl.length == 1 && repl[0] == (byte)'?') ||
|
return ((repl.length == 1 && repl[0] == (byte)'?') ||
|
||||||
super.isLegalReplacement(repl));
|
super.isLegalReplacement(repl));
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -139,6 +139,16 @@ public class US_ASCII
|
|||||||
return c < 0x80;
|
return c < 0x80;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean canEncode(CharSequence cs) {
|
||||||
|
int length = cs.length();
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
if (!canEncode(cs.charAt(i))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isLegalReplacement(byte[] repl) {
|
public boolean isLegalReplacement(byte[] repl) {
|
||||||
return (repl.length == 1 && repl[0] >= 0) ||
|
return (repl.length == 1 && repl[0] >= 0) ||
|
||||||
super.isLegalReplacement(repl);
|
super.isLegalReplacement(repl);
|
||||||
|
|||||||
187
test/micro/org/openjdk/bench/java/nio/CharsetCanEncode.java
Normal file
187
test/micro/org/openjdk/bench/java/nio/CharsetCanEncode.java
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
package org.openjdk.bench.java.nio;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||||
|
import org.openjdk.jmh.annotations.Fork;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.Mode;
|
||||||
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.Scope;
|
||||||
|
import org.openjdk.jmh.annotations.State;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.charset.CharsetEncoder;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
@State(Scope.Thread)
|
||||||
|
@Fork(3)
|
||||||
|
public class CharsetCanEncode {
|
||||||
|
|
||||||
|
private static final char ALEF_CHAR = '\u05d0';
|
||||||
|
private static final String ALEF_STRING = "\u05d0";
|
||||||
|
|
||||||
|
// sun.nio.cs.US_ASCII
|
||||||
|
private CharsetEncoder ascii = Charset.forName("US-ASCII").newEncoder();
|
||||||
|
|
||||||
|
// sun.nio.cs.ISO_8859_1
|
||||||
|
private CharsetEncoder iso88591 = Charset.forName("ISO-8859-1").newEncoder();
|
||||||
|
|
||||||
|
// sun.nio.cs.SingleByte
|
||||||
|
private CharsetEncoder iso88592 = Charset.forName("ISO-8859-2").newEncoder();
|
||||||
|
|
||||||
|
// sun.nio.cs.DoubleByte
|
||||||
|
private CharsetEncoder shiftjis = Charset.forName("Shift_JIS").newEncoder();
|
||||||
|
|
||||||
|
// sun.nio.cs.UTF_8
|
||||||
|
private CharsetEncoder utf8 = Charset.forName("UTF-8").newEncoder();
|
||||||
|
|
||||||
|
// sun.nio.cs.UTF_16LE
|
||||||
|
private CharsetEncoder utf16le = Charset.forName("UTF-16LE").newEncoder();
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean asciiCanEncodeCharYes() {
|
||||||
|
return ascii.canEncode('D');
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean asciiCanEncodeStringYes() {
|
||||||
|
return ascii.canEncode("D");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean asciiCanEncodeCharNo() {
|
||||||
|
return ascii.canEncode(ALEF_CHAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean asciiCanEncodeStringNo() {
|
||||||
|
return ascii.canEncode(ALEF_STRING);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean iso88591CanEncodeCharYes() {
|
||||||
|
return iso88591.canEncode('D');
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean iso88591CanEncodeStringYes() {
|
||||||
|
return iso88591.canEncode("D");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean iso88591CanEncodeCharNo() {
|
||||||
|
return iso88591.canEncode(ALEF_CHAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean iso88591CanEncodeStringNo() {
|
||||||
|
return iso88591.canEncode(ALEF_STRING);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean iso88592CanEncodeCharYes() {
|
||||||
|
return iso88592.canEncode('D');
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean iso88592CanEncodeStringYes() {
|
||||||
|
return iso88592.canEncode("D");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean iso88592CanEncodeCharNo() {
|
||||||
|
return iso88592.canEncode(ALEF_CHAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean iso88592CanEncodeStringNo() {
|
||||||
|
return iso88592.canEncode(ALEF_STRING);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean shiftjisCanEncodeCharYes() {
|
||||||
|
return shiftjis.canEncode('D');
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean shiftjisCanEncodeStringYes() {
|
||||||
|
return shiftjis.canEncode("D");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean shiftjisCanEncodeCharNo() {
|
||||||
|
return shiftjis.canEncode(ALEF_CHAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean shiftjisCanEncodeStringNo() {
|
||||||
|
return shiftjis.canEncode(ALEF_STRING);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean utf8CanEncodeCharYes() {
|
||||||
|
return utf8.canEncode('D');
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean utf8CanEncodeStringYes() {
|
||||||
|
return utf8.canEncode("D");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean utf8CanEncodeCharNo() {
|
||||||
|
return utf8.canEncode(Character.MIN_SURROGATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean utf8CanEncodeStringNo() {
|
||||||
|
return utf8.canEncode(String.valueOf(Character.MIN_SURROGATE));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean utf16leCanEncodeCharYes() {
|
||||||
|
return utf16le.canEncode('D');
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean utf16leCanEncodeStringYes() {
|
||||||
|
return utf16le.canEncode("D");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean utf16leCanEncodeCharNo() {
|
||||||
|
return utf16le.canEncode(Character.MIN_SURROGATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public boolean utf16leCanEncodeStringNo() {
|
||||||
|
return utf16le.canEncode(String.valueOf(Character.MIN_SURROGATE));
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user