mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-16 05:15:22 +00:00
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
Implement sun.nio.cs.ArrayEn/Decoer in utf8 Reviewed-by: alanb
This commit is contained in:
parent
9678eb8476
commit
371bd92aef
@ -222,13 +222,13 @@ class StringCoding {
|
||||
off = 0;
|
||||
}
|
||||
}
|
||||
cd.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.reset();
|
||||
if (cd instanceof ArrayDecoder) {
|
||||
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
|
||||
return safeTrim(ca, clen, cs, isTrusted);
|
||||
} else {
|
||||
cd.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.reset();
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
try {
|
||||
@ -356,13 +356,13 @@ class StringCoding {
|
||||
off = 0;
|
||||
}
|
||||
}
|
||||
ce.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.reset();
|
||||
if (ce instanceof ArrayEncoder) {
|
||||
int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
|
||||
return safeTrim(ba, blen, cs, isTrusted);
|
||||
} else {
|
||||
ce.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.reset();
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba);
|
||||
CharBuffer cb = CharBuffer.wrap(ca, off, len);
|
||||
try {
|
||||
|
||||
@ -34,6 +34,8 @@ import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.util.Arrays;
|
||||
import sun.nio.cs.ArrayDecoder;
|
||||
import sun.nio.cs.ArrayEncoder;
|
||||
|
||||
/**
|
||||
* Utility class for zipfile name and comment decoding and encoding
|
||||
@ -47,6 +49,15 @@ final class ZipCoder {
|
||||
char[] ca = new char[len];
|
||||
if (len == 0)
|
||||
return new String(ca);
|
||||
// UTF-8 only for now. Other ArrayDeocder only handles
|
||||
// CodingErrorAction.REPLACE mode. ZipCoder uses
|
||||
// REPORT mode.
|
||||
if (isUTF8 && cd instanceof ArrayDecoder) {
|
||||
int clen = ((ArrayDecoder)cd).decode(ba, 0, length, ca);
|
||||
if (clen == -1) // malformed
|
||||
throw new IllegalArgumentException("MALFORMED");
|
||||
return new String(ca, 0, clen);
|
||||
}
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba, 0, length);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
CoderResult cr = cd.decode(bb, cb, true);
|
||||
@ -69,6 +80,14 @@ final class ZipCoder {
|
||||
byte[] ba = new byte[len];
|
||||
if (len == 0)
|
||||
return ba;
|
||||
// UTF-8 only for now. Other ArrayDeocder only handles
|
||||
// CodingErrorAction.REPLACE mode.
|
||||
if (isUTF8 && ce instanceof ArrayEncoder) {
|
||||
int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
|
||||
if (blen == -1) // malformed
|
||||
throw new IllegalArgumentException("MALFORMED");
|
||||
return Arrays.copyOf(ba, blen);
|
||||
}
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
CoderResult cr = ce.encode(cb, bb, true);
|
||||
@ -85,7 +104,7 @@ final class ZipCoder {
|
||||
|
||||
// assume invoked only if "this" is not utf8
|
||||
byte[] getBytesUTF8(String s) {
|
||||
if (isutf8)
|
||||
if (isUTF8)
|
||||
return getBytes(s);
|
||||
if (utf8 == null)
|
||||
utf8 = new ZipCoder(StandardCharset.UTF_8);
|
||||
@ -94,7 +113,7 @@ final class ZipCoder {
|
||||
|
||||
|
||||
String toStringUTF8(byte[] ba, int len) {
|
||||
if (isutf8)
|
||||
if (isUTF8)
|
||||
return toString(ba, len);
|
||||
if (utf8 == null)
|
||||
utf8 = new ZipCoder(StandardCharset.UTF_8);
|
||||
@ -102,18 +121,18 @@ final class ZipCoder {
|
||||
}
|
||||
|
||||
boolean isUTF8() {
|
||||
return isutf8;
|
||||
return isUTF8;
|
||||
}
|
||||
|
||||
private Charset cs;
|
||||
private CharsetDecoder dec;
|
||||
private CharsetEncoder enc;
|
||||
private boolean isutf8;
|
||||
private boolean isUTF8;
|
||||
private ZipCoder utf8;
|
||||
|
||||
private ZipCoder(Charset cs) {
|
||||
this.cs = cs;
|
||||
this.isutf8 = cs.name().equals(StandardCharset.UTF_8.name());
|
||||
this.isUTF8 = cs.name().equals(StandardCharset.UTF_8.name());
|
||||
}
|
||||
|
||||
static ZipCoder get(Charset charset) {
|
||||
|
||||
@ -32,6 +32,7 @@ import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
|
||||
/* Legal UTF-8 Byte Sequences
|
||||
*
|
||||
@ -77,7 +78,8 @@ class UTF_8 extends Unicode
|
||||
dst.position(dp - dst.arrayOffset());
|
||||
}
|
||||
|
||||
private static class Decoder extends CharsetDecoder {
|
||||
private static class Decoder extends CharsetDecoder
|
||||
implements ArrayDecoder {
|
||||
private Decoder(Charset cs) {
|
||||
super(cs, 1.0f, 1.0f);
|
||||
}
|
||||
@ -353,9 +355,132 @@ class UTF_8 extends Unicode
|
||||
else
|
||||
return decodeBufferLoop(src, dst);
|
||||
}
|
||||
|
||||
private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
|
||||
{
|
||||
if (bb == null)
|
||||
bb = ByteBuffer.wrap(ba);
|
||||
bb.position(sp);
|
||||
return bb;
|
||||
}
|
||||
|
||||
// returns -1 if there is malformed byte(s) and the
|
||||
// "action" for malformed input is not REPLACE.
|
||||
public int decode(byte[] sa, int sp, int len, char[] da) {
|
||||
final int sl = sp + len;
|
||||
int dp = 0;
|
||||
int dlASCII = Math.min(len, da.length);
|
||||
ByteBuffer bb = null; // only necessary if malformed
|
||||
|
||||
// ASCII only optimized loop
|
||||
while (dp < dlASCII && sa[sp] >= 0)
|
||||
da[dp++] = (char) sa[sp++];
|
||||
|
||||
while (sp < sl) {
|
||||
int b1 = sa[sp++];
|
||||
if (b1 >= 0) {
|
||||
// 1 byte, 7 bits: 0xxxxxxx
|
||||
da[dp++] = (char) b1;
|
||||
} else if ((b1 >> 5) == -2) {
|
||||
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
|
||||
if (sp < sl) {
|
||||
int b2 = sa[sp++];
|
||||
if (isMalformed2(b1, b2)) {
|
||||
if (malformedInputAction() != CodingErrorAction.REPLACE)
|
||||
return -1;
|
||||
da[dp++] = replacement().charAt(0);
|
||||
sp--; // malformedN(bb, 2) always returns 1
|
||||
} else {
|
||||
da[dp++] = (char) (((b1 << 6) ^ b2)^
|
||||
(((byte) 0xC0 << 6) ^
|
||||
((byte) 0x80 << 0)));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (malformedInputAction() != CodingErrorAction.REPLACE)
|
||||
return -1;
|
||||
da[dp++] = replacement().charAt(0);
|
||||
return dp;
|
||||
} else if ((b1 >> 4) == -2) {
|
||||
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
|
||||
if (sp + 1 < sl) {
|
||||
int b2 = sa[sp++];
|
||||
int b3 = sa[sp++];
|
||||
if (isMalformed3(b1, b2, b3)) {
|
||||
if (malformedInputAction() != CodingErrorAction.REPLACE)
|
||||
return -1;
|
||||
da[dp++] = replacement().charAt(0);
|
||||
sp -=3;
|
||||
bb = getByteBuffer(bb, sa, sp);
|
||||
sp += malformedN(bb, 3).length();
|
||||
} else {
|
||||
da[dp++] = (char)((b1 << 12) ^
|
||||
(b2 << 6) ^
|
||||
(b3 ^
|
||||
(((byte) 0xE0 << 12) ^
|
||||
((byte) 0x80 << 6) ^
|
||||
((byte) 0x80 << 0))));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (malformedInputAction() != CodingErrorAction.REPLACE)
|
||||
return -1;
|
||||
da[dp++] = replacement().charAt(0);
|
||||
return dp;
|
||||
} else if ((b1 >> 3) == -2) {
|
||||
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
if (sp + 2 < sl) {
|
||||
int b2 = sa[sp++];
|
||||
int b3 = sa[sp++];
|
||||
int b4 = sa[sp++];
|
||||
int uc = ((b1 << 18) ^
|
||||
(b2 << 12) ^
|
||||
(b3 << 6) ^
|
||||
(b4 ^
|
||||
(((byte) 0xF0 << 18) ^
|
||||
((byte) 0x80 << 12) ^
|
||||
((byte) 0x80 << 6) ^
|
||||
((byte) 0x80 << 0))));
|
||||
if (isMalformed4(b2, b3, b4) ||
|
||||
// shortest form check
|
||||
!Character.isSupplementaryCodePoint(uc)) {
|
||||
if (malformedInputAction() != CodingErrorAction.REPLACE)
|
||||
return -1;
|
||||
da[dp++] = replacement().charAt(0);
|
||||
sp -= 4;
|
||||
bb = getByteBuffer(bb, sa, sp);
|
||||
sp += malformedN(bb, 4).length();
|
||||
} else {
|
||||
da[dp++] = Character.highSurrogate(uc);
|
||||
da[dp++] = Character.lowSurrogate(uc);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (malformedInputAction() != CodingErrorAction.REPLACE)
|
||||
return -1;
|
||||
da[dp++] = replacement().charAt(0);
|
||||
return dp;
|
||||
} else {
|
||||
if (malformedInputAction() != CodingErrorAction.REPLACE)
|
||||
return -1;
|
||||
da[dp++] = replacement().charAt(0);
|
||||
sp--;
|
||||
bb = getByteBuffer(bb, sa, sp);
|
||||
CoderResult cr = malformedN(bb, 1);
|
||||
if (!cr.isError()) {
|
||||
// leading byte for 5 or 6-byte, but don't have enough
|
||||
// bytes in buffer to check. Consumed rest as malformed.
|
||||
return dp;
|
||||
}
|
||||
sp += cr.length();
|
||||
}
|
||||
}
|
||||
return dp;
|
||||
}
|
||||
}
|
||||
|
||||
private static class Encoder extends CharsetEncoder {
|
||||
private static class Encoder extends CharsetEncoder
|
||||
implements ArrayEncoder {
|
||||
|
||||
private Encoder(Charset cs) {
|
||||
super(cs, 1.1f, 3.0f);
|
||||
@ -495,5 +620,50 @@ class UTF_8 extends Unicode
|
||||
else
|
||||
return encodeBufferLoop(src, dst);
|
||||
}
|
||||
|
||||
// returns -1 if there is malformed char(s) and the
|
||||
// "action" for malformed input is not REPLACE.
|
||||
public int encode(char[] sa, int sp, int len, byte[] da) {
|
||||
int sl = sp + len;
|
||||
int dp = 0;
|
||||
int dlASCII = dp + Math.min(len, da.length);
|
||||
|
||||
// ASCII only optimized loop
|
||||
while (dp < dlASCII && sa[sp] < '\u0080')
|
||||
da[dp++] = (byte) sa[sp++];
|
||||
|
||||
while (sp < sl) {
|
||||
char c = sa[sp++];
|
||||
if (c < 0x80) {
|
||||
// Have at most seven bits
|
||||
da[dp++] = (byte)c;
|
||||
} else if (c < 0x800) {
|
||||
// 2 bytes, 11 bits
|
||||
da[dp++] = (byte)(0xc0 | (c >> 6));
|
||||
da[dp++] = (byte)(0x80 | (c & 0x3f));
|
||||
} else if (Character.isSurrogate(c)) {
|
||||
if (sgp == null)
|
||||
sgp = new Surrogate.Parser();
|
||||
int uc = sgp.parse(c, sa, sp - 1, sl);
|
||||
if (uc < 0) {
|
||||
if (malformedInputAction() != CodingErrorAction.REPLACE)
|
||||
return -1;
|
||||
da[dp++] = replacement()[0];
|
||||
} else {
|
||||
da[dp++] = (byte)(0xf0 | ((uc >> 18)));
|
||||
da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
|
||||
da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
|
||||
da[dp++] = (byte)(0x80 | (uc & 0x3f));
|
||||
sp++; // 2 chars
|
||||
}
|
||||
} else {
|
||||
// 3 bytes, 16 bits
|
||||
da[dp++] = (byte)(0xe0 | ((c >> 12)));
|
||||
da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
|
||||
da[dp++] = (byte)(0x80 | (c & 0x3f));
|
||||
}
|
||||
}
|
||||
return dp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
92
jdk/test/sun/nio/cs/StrCodingBenchmarkUTF8.java
Normal file
92
jdk/test/sun/nio/cs/StrCodingBenchmarkUTF8.java
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
import java.nio.*;
|
||||
import java.nio.charset.*;
|
||||
|
||||
public class StrCodingBenchmarkUTF8 {
|
||||
|
||||
public static void main(String[] args) throws Throwable {
|
||||
|
||||
final int itrs = Integer.getInteger("iterations", 100000);
|
||||
final int size = 2048;
|
||||
final int subsize = Integer.getInteger("subsize", 128);
|
||||
final Random rnd = new Random();
|
||||
final int maxchar = 0x7f;
|
||||
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
final String csn = charset.name();
|
||||
final Charset cs = charset;
|
||||
|
||||
int[] starts = new int[] { 0, 0x80, 0x800, 0x10000};
|
||||
for (int nb = 1; nb <= 4; nb++) {
|
||||
|
||||
final CharsetEncoder enc = cs.newEncoder();
|
||||
|
||||
char[] cc = new char[size];
|
||||
int i = 0;
|
||||
while (i < size - 3) {
|
||||
i += Character.toChars(starts[nb - 1] + rnd.nextInt(maxchar), cc, i);
|
||||
}
|
||||
|
||||
final String string = new String(cc);
|
||||
final byte[] bytes = string.getBytes(cs);
|
||||
|
||||
System.out.printf("%n--------%s[nb=%d]---------%n", csn, nb);
|
||||
int sz = 12;
|
||||
while (sz < size) {
|
||||
System.out.printf(" [len=%d]%n", sz);
|
||||
final byte[] bs = Arrays.copyOf(bytes, sz);
|
||||
final String str = new String(bs, csn);
|
||||
StrCodingBenchmark.Job[] jobs = {
|
||||
new StrCodingBenchmark.Job("String decode: csn") {
|
||||
public void work() throws Throwable {
|
||||
for (int i = 0; i < itrs; i++)
|
||||
new String(bs, csn);
|
||||
}},
|
||||
|
||||
new StrCodingBenchmark.Job("String decode: cs") {
|
||||
public void work() throws Throwable {
|
||||
for (int i = 0; i < itrs; i++)
|
||||
new String(bs, cs);
|
||||
}},
|
||||
|
||||
new StrCodingBenchmark.Job("String encode: csn") {
|
||||
public void work() throws Throwable {
|
||||
for (int i = 0; i < itrs; i++)
|
||||
str.getBytes(csn);
|
||||
}},
|
||||
|
||||
new StrCodingBenchmark.Job("String encode: cs") {
|
||||
public void work() throws Throwable {
|
||||
for (int i = 0; i < itrs; i++)
|
||||
str.getBytes(cs);
|
||||
}},
|
||||
};
|
||||
StrCodingBenchmark.time(StrCodingBenchmark.filter(null, jobs));
|
||||
sz <<= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -24,7 +24,7 @@
|
||||
*/
|
||||
|
||||
/* @test
|
||||
@bug 6636323 6636319
|
||||
@bug 6636323 6636319 7040220
|
||||
@summary Test if StringCoding and NIO result have the same de/encoding result
|
||||
* @run main/othervm/timeout=2000 TestStringCoding
|
||||
*/
|
||||
@ -111,6 +111,8 @@ public class TestStringCoding {
|
||||
//encode unmappable surrogates
|
||||
if (enc instanceof sun.nio.cs.ArrayEncoder &&
|
||||
cs.contains(Charset.forName("ASCII"))) {
|
||||
if (cs.name().equals("UTF-8")) // utf8 handles surrogates
|
||||
return;
|
||||
enc.replaceWith(new byte[] { (byte)'A'});
|
||||
sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc;
|
||||
|
||||
|
||||
166
jdk/test/sun/nio/cs/TestStringCodingUTF8.java
Normal file
166
jdk/test/sun/nio/cs/TestStringCodingUTF8.java
Normal file
@ -0,0 +1,166 @@
|
||||
/*
|
||||
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/* @test
|
||||
@bug 7040220
|
||||
@summary Test if StringCoding and NIO result have the same de/encoding result for UTF-8
|
||||
* @run main/othervm/timeout=2000 TestStringCodingUTF8
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
import java.nio.*;
|
||||
import java.nio.charset.*;
|
||||
|
||||
public class TestStringCodingUTF8 {
|
||||
public static void main(String[] args) throws Throwable {
|
||||
test();
|
||||
// security manager on
|
||||
System.setSecurityManager(new PermissiveSecurityManger());
|
||||
test();
|
||||
}
|
||||
|
||||
static void test() throws Throwable {
|
||||
Charset cs = Charset.forName("UTF-8");
|
||||
char[] bmp = new char[0x10000];
|
||||
for (int i = 0; i < 0x10000; i++) {
|
||||
bmp[i] = (char)i;
|
||||
}
|
||||
test(cs, bmp, 0, bmp.length);
|
||||
|
||||
ArrayList<Integer> list = new ArrayList<>(0x20000);
|
||||
for (int i = 0; i < 0x20000; i++) {
|
||||
list.add(i, i);
|
||||
}
|
||||
Collections.shuffle(list);
|
||||
int j = 0;
|
||||
char[] bmpsupp = new char[0x30000];
|
||||
for (int i = 0; i < 0x20000; i++) {
|
||||
j += Character.toChars(list.get(i), bmpsupp, j);
|
||||
}
|
||||
assert (j == bmpsupp.length);
|
||||
test(cs, bmpsupp, 0, bmpsupp.length);
|
||||
|
||||
// randomed "off" and "len" on shuffled data
|
||||
Random rnd = new Random();
|
||||
int maxlen = 1000;
|
||||
int itr = 5000;
|
||||
for (int i = 0; i < itr; i++) {
|
||||
int off = rnd.nextInt(bmpsupp.length - maxlen);
|
||||
int len = rnd.nextInt(maxlen);
|
||||
test(cs, bmpsupp, off, len);
|
||||
}
|
||||
|
||||
// random length of bytes, test the edge corner case
|
||||
for (int i = 0; i < itr; i++) {
|
||||
byte[] ba = new byte[rnd.nextInt(maxlen)];
|
||||
rnd.nextBytes(ba);
|
||||
//new String(csn);
|
||||
if (!new String(ba, cs.name()).equals(
|
||||
new String(decode(cs, ba, 0, ba.length))))
|
||||
throw new RuntimeException("new String(csn) failed");
|
||||
//new String(cs);
|
||||
if (!new String(ba, cs).equals(
|
||||
new String(decode(cs, ba, 0, ba.length))))
|
||||
throw new RuntimeException("new String(cs) failed");
|
||||
}
|
||||
System.out.println("done!");
|
||||
}
|
||||
|
||||
static void test(Charset cs, char[] ca, int off, int len) throws Throwable {
|
||||
String str = new String(ca, off, len);
|
||||
byte[] ba = encode(cs, ca, off, len);
|
||||
|
||||
//getBytes(csn);
|
||||
byte[] baStr = str.getBytes(cs.name());
|
||||
if (!Arrays.equals(ba, baStr))
|
||||
throw new RuntimeException("getBytes(csn) failed");
|
||||
|
||||
//getBytes(cs);
|
||||
baStr = str.getBytes(cs);
|
||||
if (!Arrays.equals(ba, baStr))
|
||||
throw new RuntimeException("getBytes(cs) failed");
|
||||
|
||||
//new String(csn);
|
||||
if (!new String(ba, cs.name()).equals(new String(decode(cs, ba, 0, ba.length))))
|
||||
throw new RuntimeException("new String(csn) failed");
|
||||
|
||||
//new String(cs);
|
||||
if (!new String(ba, cs).equals(new String(decode(cs, ba, 0, ba.length))))
|
||||
throw new RuntimeException("new String(cs) failed");
|
||||
}
|
||||
|
||||
// copy/paste of the StringCoding.decode()
|
||||
static char[] decode(Charset cs, byte[] ba, int off, int len) {
|
||||
CharsetDecoder cd = cs.newDecoder();
|
||||
int en = (int)(len * cd.maxCharsPerByte());
|
||||
char[] ca = new char[en];
|
||||
if (len == 0)
|
||||
return ca;
|
||||
cd.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.reset();
|
||||
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
try {
|
||||
CoderResult cr = cd.decode(bb, cb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = cd.flush(cb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new Error(x);
|
||||
}
|
||||
return Arrays.copyOf(ca, cb.position());
|
||||
}
|
||||
|
||||
// copy/paste of the StringCoding.encode()
|
||||
static byte[] encode(Charset cs, char[] ca, int off, int len) {
|
||||
CharsetEncoder ce = cs.newEncoder();
|
||||
int en = (int)(len * ce.maxBytesPerChar());
|
||||
byte[] ba = new byte[en];
|
||||
if (len == 0)
|
||||
return ba;
|
||||
ce.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.reset();
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba);
|
||||
CharBuffer cb = CharBuffer.wrap(ca, off, len);
|
||||
try {
|
||||
CoderResult cr = ce.encode(cb, bb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = ce.flush(bb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new Error(x);
|
||||
}
|
||||
return Arrays.copyOf(ba, bb.position());
|
||||
}
|
||||
|
||||
static class PermissiveSecurityManger extends SecurityManager {
|
||||
@Override public void checkPermission(java.security.Permission p) {}
|
||||
}
|
||||
}
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 4486841
|
||||
* @bug 4486841 7040220
|
||||
* @summary Test UTF-8 charset
|
||||
*/
|
||||
|
||||
@ -70,6 +70,32 @@ public class TestUTF8 {
|
||||
return dec.decode(bbf, cbf, true);
|
||||
}
|
||||
|
||||
// copy/paste of the StringCoding.decode()
|
||||
static char[] decode(Charset cs, byte[] ba, int off, int len) {
|
||||
CharsetDecoder cd = cs.newDecoder();
|
||||
int en = (int)(len * cd.maxCharsPerByte());
|
||||
char[] ca = new char[en];
|
||||
if (len == 0)
|
||||
return ca;
|
||||
cd.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.reset();
|
||||
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
|
||||
CharBuffer cb = CharBuffer.wrap(ca);
|
||||
try {
|
||||
CoderResult cr = cd.decode(bb, cb, true);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
cr = cd.flush(cb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new Error(x);
|
||||
}
|
||||
return Arrays.copyOf(ca, cb.position());
|
||||
}
|
||||
|
||||
static byte[] encode(char[] cc, String csn, boolean testDirect)
|
||||
throws Exception {
|
||||
ByteBuffer bbf;
|
||||
@ -142,7 +168,14 @@ public class TestUTF8 {
|
||||
bb = encode(cc, csn, true);
|
||||
ccO = decode(bb, csn, true);
|
||||
if (!Arrays.equals(cc, ccO)) {
|
||||
System.out.printf(" (direct) failed");
|
||||
System.out.print(" (direct) failed");
|
||||
}
|
||||
// String.getBytes()/toCharArray() goes to ArrayDe/Encoder path
|
||||
if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
|
||||
System.out.printf(" String.getBytes() failed");
|
||||
}
|
||||
if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
|
||||
System.out.printf(" String.toCharArray() failed");
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
@ -168,6 +201,12 @@ public class TestUTF8 {
|
||||
if (!Arrays.equals(cc, ccO)) {
|
||||
System.out.printf(" decoding(direct) failed%n");
|
||||
}
|
||||
// new String(bb, csn).getBytes(csn) will not return
|
||||
// the 6 bytes surrogates as in bb, so only test
|
||||
// toCharArray() here.
|
||||
if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
|
||||
System.out.printf(" String.toCharArray() failed");
|
||||
}
|
||||
}
|
||||
|
||||
static void compare(String csn1, String csn2) throws Exception {
|
||||
@ -274,6 +313,7 @@ public class TestUTF8 {
|
||||
static void checkMalformed(String csn) throws Exception {
|
||||
boolean failed = false;
|
||||
System.out.printf(" Check malformed <%s>...%n", csn);
|
||||
Charset cs = Charset.forName(csn);
|
||||
for (boolean direct: new boolean[] {false, true}) {
|
||||
for (byte[] bins : malformed) {
|
||||
int mlen = bins[0];
|
||||
@ -285,10 +325,15 @@ public class TestUTF8 {
|
||||
ashex += Integer.toBinaryString((int)bin[i] & 0xff);
|
||||
}
|
||||
if (!cr.isMalformed()) {
|
||||
System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
|
||||
System.out.printf(" FAIL(direct=%b): [%s] not malformed.%n", direct, ashex);
|
||||
failed = true;
|
||||
} else if (cr.length() != mlen) {
|
||||
System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
|
||||
System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].%n", direct, ashex, cr.length());
|
||||
failed = true;
|
||||
}
|
||||
if (!Arrays.equals(decode(cs, bin, 0, bin.length),
|
||||
new String(bin, csn).toCharArray())) {
|
||||
System.out.printf(" FAIL(new String(bb, %s)) failed%n", csn);
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user