mirror of
https://github.com/openjdk/jdk.git
synced 2026-04-13 16:38:50 +00:00
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
6229811: Several codepoints in EUC_TW failed in roundtrip conversion Re-write EUC_TW charset to address the size and roundtrip issue. Reviewed-by: alanb
This commit is contained in:
parent
6a308df390
commit
a1958b22ef
@ -827,8 +827,7 @@ CHARSETMAPPING_JARFILE = $(BUILDTOOLJARDIR)/charsetmapping.jar
|
||||
|
||||
$(FILES_gensbcs_out): $(GENCSSRC)/SingleByte-X.java $(GENCSSRC)/sbcs
|
||||
@$(prep-target)
|
||||
$(BOOT_JAVA_CMD) -cp $(CHARSETMAPPING_JARFILE) build.tools.charsetmapping.GenerateSBCS \
|
||||
$(GENCSSRC) $(SCS_GEN) sbcs
|
||||
$(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) $(GENCSSRC) $(SCS_GEN) sbcs
|
||||
|
||||
#
|
||||
# Generated file system implementation classes (Unix only)
|
||||
|
||||
@ -58,11 +58,12 @@ ifdef OPENJDK
|
||||
else
|
||||
RENDER_SUBDIR = dcpr
|
||||
endif
|
||||
# nio need to be compiled before awt to have all charsets ready
|
||||
SUBDIRS = jar security javazic misc net audio $(RENDER_SUBDIR) image \
|
||||
awt splashscreen $(XAWT_SUBDIR) \
|
||||
nio awt splashscreen $(XAWT_SUBDIR) \
|
||||
$(HEADLESS_SUBDIR) $(DGA_SUBDIR) \
|
||||
font jpeg cmm applet rmi beans $(JDBC_SUBDIR) \
|
||||
jawt text nio launcher management $(ORG_SUBDIR) \
|
||||
jawt text launcher management $(ORG_SUBDIR) \
|
||||
native2ascii serialver tools jconsole tracing
|
||||
|
||||
all build clean clobber::
|
||||
|
||||
@ -304,7 +304,7 @@ FILES_src = \
|
||||
sun/io/CharToByteMacUkraine.java \
|
||||
sun/io/CharToByteTIS620.java
|
||||
|
||||
FILES_gen_extsbcs = \
|
||||
FILES_gen_extcs = \
|
||||
sun/nio/cs/ext/IBM037.java \
|
||||
sun/nio/cs/ext/IBM1006.java \
|
||||
sun/nio/cs/ext/IBM1025.java \
|
||||
@ -374,6 +374,8 @@ FILES_gen_extsbcs = \
|
||||
sun/nio/cs/ext/MacThai.java \
|
||||
sun/nio/cs/ext/MacTurkish.java \
|
||||
sun/nio/cs/ext/MacUkraine.java \
|
||||
sun/nio/cs/ext/TIS_620.java
|
||||
sun/nio/cs/ext/TIS_620.java \
|
||||
sun/nio/cs/ext/EUC_TWMapping.java
|
||||
|
||||
FILES_java = $(FILES_src) $(FILES_gen_extcs)
|
||||
|
||||
FILES_java = $(FILES_src) $(FILES_gen_extsbcs)
|
||||
@ -61,14 +61,14 @@ endif # PLATFORM
|
||||
CHARSETS_JAR = $(LIBDIR)/charsets.jar
|
||||
|
||||
# extsbcs
|
||||
FILES_genout_extsbcs = $(FILES_gen_extsbcs:%.java=$(GENSRCDIR)/%.java)
|
||||
FILES_genout_extcs = $(FILES_gen_extcs:%.java=$(GENSRCDIR)/%.java)
|
||||
|
||||
#
|
||||
# Rules
|
||||
#
|
||||
include $(BUILDDIR)/common/Classes.gmk
|
||||
|
||||
build: $(FILES_genout_extsbcs) $(CHARSETS_JAR)
|
||||
build: $(FILES_genout_extcs) $(CHARSETS_JAR)
|
||||
|
||||
#
|
||||
# Extra rules to build character converters.
|
||||
@ -77,6 +77,7 @@ SERVICE_DESCRIPTION = java.nio.charset.spi.CharsetProvider
|
||||
SERVICE_DESCRIPTION_PATH = META-INF/services/$(SERVICE_DESCRIPTION)
|
||||
|
||||
GENCSDATASRC = $(BUILDDIR)/tools/CharsetMapping
|
||||
GENCSSRCDIR = $(BUILDDIR)/tools/src/build/tools/charsetmapping
|
||||
GENCSEXT = $(GENSRCDIR)/sun/nio/cs/ext
|
||||
|
||||
FILES_MAP = $(GENCSDATASRC)/sjis0213.map
|
||||
@ -86,16 +87,16 @@ CHARSETMAPPING_JARFILE = $(BUILDTOOLJARDIR)/charsetmapping.jar
|
||||
$(FILES_DAT): $(FILES_MAP)
|
||||
@$(prep-target)
|
||||
$(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) \
|
||||
$(FILES_MAP) $(FILES_DAT)
|
||||
$(FILES_MAP) $(FILES_DAT) sjis0213
|
||||
|
||||
|
||||
$(FILES_genout_extsbcs): $(GENCSDATASRC)/SingleByte-X.java $(GENCSDATASRC)/extsbcs
|
||||
$(FILES_genout_extcs): $(GENCSDATASRC)/SingleByte-X.java $(GENCSDATASRC)/extsbcs
|
||||
@$(prep-target)
|
||||
$(RM) -r $(GENCSEXT)
|
||||
$(MKDIR) -p $(GENCSEXT)
|
||||
$(BOOT_JAVA_CMD) -cp $(CHARSETMAPPING_JARFILE) build.tools.charsetmapping.GenerateSBCS \
|
||||
$(GENCSDATASRC) $(GENCSEXT) extsbcs
|
||||
|
||||
$(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) $(GENCSDATASRC) $(GENCSEXT) extsbcs
|
||||
$(BOOT_JAVA_CMD) -jar $(CHARSETMAPPING_JARFILE) $(GENCSDATASRC) $(GENCSEXT) euctw \
|
||||
$(GENCSSRCDIR)/GenerateEUC_TW.java
|
||||
|
||||
$(CLASSDESTDIR)/$(SERVICE_DESCRIPTION_PATH): \
|
||||
$(SHARE_SRC)/classes/sun/nio/cs/ext/$(SERVICE_DESCRIPTION_PATH)
|
||||
|
||||
@ -34,7 +34,7 @@ PROGRAM = charsetmapping
|
||||
include $(BUILDDIR)/common/Defs.gmk
|
||||
|
||||
BUILDTOOL_SOURCE_ROOT = $(BUILDDIR)/tools/src
|
||||
BUILDTOOL_MAIN = $(PKGDIR)/GenerateMapping.java
|
||||
BUILDTOOL_MAIN = $(PKGDIR)/Main.java
|
||||
|
||||
#
|
||||
# Build tool jar rules.
|
||||
|
||||
@ -36,7 +36,7 @@ import java.util.*;
|
||||
|
||||
public class CharsetMapping {
|
||||
public final static char UNMAPPABLE_DECODING = '\uFFFD';
|
||||
public final static int UNMAPPABLE_ENCODING = -1;
|
||||
public final static int UNMAPPABLE_ENCODING = 0xFFFD;
|
||||
|
||||
public static class Entry {
|
||||
public int bs; //byte sequence reps
|
||||
|
||||
@ -27,15 +27,11 @@ package build.tools.charsetmapping;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.regex.*;
|
||||
import build.tools.charsetmapping.GenerateSBCS;
|
||||
import static build.tools.charsetmapping.CharsetMapping.*;
|
||||
|
||||
public class GenerateMapping {
|
||||
public static void main(String argv[]) throws IOException {
|
||||
if (argv.length < 2) {
|
||||
System.out.println("Usage: java GenerateMapping fMap fDat");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
public static void genMapping(String argv[]) throws IOException {
|
||||
genDataJIS0213(new FileInputStream(argv[0]),
|
||||
new FileOutputStream(argv[1]));
|
||||
}
|
||||
|
||||
@ -34,11 +34,8 @@ import java.nio.charset.*;
|
||||
import static build.tools.charsetmapping.CharsetMapping.*;
|
||||
|
||||
public class GenerateSBCS {
|
||||
public static void main(String args[]) throws Exception {
|
||||
if (args.length < 3) {
|
||||
System.err.println("Usage: java GenSBCS srcDir dstDir config");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
public static void genSBCS(String args[]) throws Exception {
|
||||
|
||||
Scanner s = new Scanner(new File(args[0], args[2]));
|
||||
while (s.hasNextLine()) {
|
||||
|
||||
@ -39,30 +39,11 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
|
||||
private final byte G4 = 4;
|
||||
private final byte MSB = (byte) 0x80;
|
||||
private final byte SS2 = (byte) 0x8E;
|
||||
private final byte P2 = (byte) 0xA2;
|
||||
private final byte P3 = (byte) 0xA3;
|
||||
|
||||
protected final char REPLACE_CHAR = '\uFFFD';
|
||||
|
||||
private byte firstByte = 0, state = G0;
|
||||
public static String unicodeCNS2, unicodeCNS3;
|
||||
private static String unicodeCNS4, unicodeCNS5, unicodeCNS6;
|
||||
private static String unicodeCNS7, unicodeCNS15;
|
||||
|
||||
private int cnsPlane = 0;
|
||||
private final static EUC_TW nioCoder = new EUC_TW();
|
||||
|
||||
public static String unicodeCNS1 = nioCoder.getUnicodeCNS1();
|
||||
|
||||
static String[] cnsChars = {
|
||||
unicodeCNS2 = nioCoder.getUnicodeCNS2(),
|
||||
unicodeCNS3 = nioCoder.getUnicodeCNS3(),
|
||||
unicodeCNS4 = nioCoder.getUnicodeCNS4(),
|
||||
unicodeCNS5 = nioCoder.getUnicodeCNS5(),
|
||||
unicodeCNS6 = nioCoder.getUnicodeCNS6(),
|
||||
unicodeCNS7 = nioCoder.getUnicodeCNS7(),
|
||||
unicodeCNS15 = nioCoder.getUnicodeCNS15()
|
||||
};
|
||||
private EUC_TW.Decoder dec = (EUC_TW.Decoder)(new EUC_TW().newDecoder());
|
||||
|
||||
public ByteToCharEUC_TW() {
|
||||
}
|
||||
@ -81,6 +62,7 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
dec.reset();
|
||||
state = G0;
|
||||
firstByte = 0;
|
||||
byteOff = charOff = 0;
|
||||
@ -95,7 +77,7 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
|
||||
ConversionBufferFullException
|
||||
{
|
||||
int inputSize = 0;
|
||||
char outputChar = (char) 0;
|
||||
char[] c1 = new char[1];
|
||||
|
||||
byteOff = inOff;
|
||||
charOff = outOff;
|
||||
@ -104,11 +86,12 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
|
||||
while (byteOff < inEnd) {
|
||||
if (charOff >= outEnd)
|
||||
throw new ConversionBufferFullException();
|
||||
|
||||
char[] outputChar = null;
|
||||
switch (state) {
|
||||
case G0:
|
||||
if ( (input[byteOff] & MSB) == 0) { // ASCII
|
||||
outputChar = (char) input[byteOff];
|
||||
outputChar = c1;
|
||||
outputChar[0] = (char) input[byteOff];
|
||||
} else if (input[byteOff] == SS2) { // Codeset 2
|
||||
state = G2;
|
||||
} else { // Codeset 1
|
||||
@ -119,9 +102,10 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
|
||||
case G1:
|
||||
inputSize = 2;
|
||||
if ( (input[byteOff] & MSB) != 0) { // 2nd byte
|
||||
cnsPlane = 1;
|
||||
outputChar = convToUnicode(firstByte,
|
||||
input[byteOff], unicodeCNS1);
|
||||
cnsPlane = 0;
|
||||
outputChar = dec.toUnicode(firstByte & 0xff,
|
||||
input[byteOff] & 0xff,
|
||||
cnsPlane);
|
||||
} else { // Error
|
||||
badInputLength = 1;
|
||||
throw new MalformedInputException();
|
||||
@ -154,9 +138,9 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
|
||||
break;
|
||||
case G4:
|
||||
if ( (input[byteOff] & MSB) != 0) { // 2nd byte
|
||||
outputChar = convToUnicode(firstByte,
|
||||
input[byteOff],
|
||||
cnsChars[cnsPlane - 2]);
|
||||
outputChar = dec.toUnicode(firstByte & 0xff,
|
||||
input[byteOff] & 0xff,
|
||||
cnsPlane - 1);
|
||||
} else { // Error
|
||||
badInputLength = 3;
|
||||
throw new MalformedInputException();
|
||||
@ -166,21 +150,19 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
|
||||
break;
|
||||
}
|
||||
byteOff++;
|
||||
|
||||
if (outputChar != (char) 0) {
|
||||
if (outputChar == REPLACE_CHAR) {
|
||||
if (subMode) // substitution enabled
|
||||
outputChar = subChars[0];
|
||||
else {
|
||||
if (state == G0) {
|
||||
if (outputChar == null) {
|
||||
if (subMode) { // substitution enabled
|
||||
outputChar = c1;
|
||||
outputChar[0] = subChars[0];
|
||||
} else {
|
||||
badInputLength = inputSize;
|
||||
throw new UnknownCharacterException();
|
||||
}
|
||||
}
|
||||
output[charOff++] = outputChar;
|
||||
outputChar = 0;
|
||||
output[charOff++] = outputChar[0];
|
||||
}
|
||||
}
|
||||
|
||||
return charOff - outOff;
|
||||
}
|
||||
|
||||
@ -191,25 +173,4 @@ public class ByteToCharEUC_TW extends ByteToCharConverter
|
||||
public String getCharacterEncoding() {
|
||||
return "EUC_TW";
|
||||
}
|
||||
|
||||
protected char convToUnicode(byte byte1, byte byte2, String table)
|
||||
{
|
||||
int index;
|
||||
|
||||
if ((byte1 & 0xff) < 0xa1 || (byte2 & 0xff) < 0xa1 ||
|
||||
(byte1 & 0xff) > 0xfe || (byte2 & 0xff) > 0xfe)
|
||||
return REPLACE_CHAR;
|
||||
index = (((byte1 & 0xff) - 0xa1) * 94) + (byte2 & 0xff) - 0xa1;
|
||||
if (index < 0 || index >= table.length())
|
||||
return REPLACE_CHAR;
|
||||
|
||||
// Planes 3 and above containing zero value lead byte
|
||||
// to accommodate surrogates for mappings which decode to a surrogate
|
||||
// pair
|
||||
|
||||
if (this.cnsPlane >= 3)
|
||||
index = (index * 2) + 1;
|
||||
|
||||
return table.charAt(index);
|
||||
}
|
||||
}
|
||||
|
||||
@ -33,19 +33,7 @@ import sun.nio.cs.ext.EUC_TW;
|
||||
|
||||
public class CharToByteEUC_TW extends CharToByteConverter
|
||||
{
|
||||
private final byte MSB = (byte)0x80;
|
||||
private final byte SS2 = (byte) 0x8E;
|
||||
private final byte P2 = (byte) 0xA2;
|
||||
private final byte P3 = (byte) 0xA3;
|
||||
|
||||
private final static EUC_TW nioCoder = new EUC_TW();
|
||||
|
||||
private static String uniTab1 = nioCoder.getUniTab1();
|
||||
private static String uniTab2 = nioCoder.getUniTab2();
|
||||
private static String uniTab3 = nioCoder.getUniTab3();
|
||||
private static String cnsTab1 = nioCoder.getCNSTab1();
|
||||
private static String cnsTab2 = nioCoder.getCNSTab2();
|
||||
private static String cnsTab3 = nioCoder.getCNSTab3();
|
||||
private final EUC_TW.Encoder enc = (EUC_TW.Encoder)(new EUC_TW().newEncoder());
|
||||
|
||||
public int flush(byte[] output, int outStart, int outEnd)
|
||||
throws MalformedInputException
|
||||
@ -59,10 +47,7 @@ public class CharToByteEUC_TW extends CharToByteConverter
|
||||
}
|
||||
|
||||
public boolean canConvert(char ch){
|
||||
if (((0xFF00 & ch) != 0) && (getNative(ch) != -1)){
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return enc.canEncode(ch);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -74,9 +59,8 @@ public class CharToByteEUC_TW extends CharToByteConverter
|
||||
ConversionBufferFullException
|
||||
{
|
||||
int outputSize;
|
||||
byte [] tmpbuf = new byte[4];
|
||||
byte [] tmpbuf = new byte[4];;
|
||||
byte [] outputByte;
|
||||
|
||||
byteOff = outOff;
|
||||
|
||||
//Fixed 4122961 by bringing the charOff++ out to this
|
||||
@ -88,7 +72,7 @@ public class CharToByteEUC_TW extends CharToByteConverter
|
||||
outputSize = 1;
|
||||
outputByte[0] = (byte)(input[charOff] & 0x7f);
|
||||
} else {
|
||||
outputSize = unicodeToEUC(input[charOff], outputByte);
|
||||
outputSize = enc.toEUC(input[charOff], outputByte);
|
||||
}
|
||||
|
||||
if (outputSize == -1) {
|
||||
@ -112,7 +96,6 @@ public class CharToByteEUC_TW extends CharToByteConverter
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* returns the maximum number of bytes needed to convert a char
|
||||
*/
|
||||
@ -120,111 +103,10 @@ public class CharToByteEUC_TW extends CharToByteConverter
|
||||
return 4;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the character set ID
|
||||
*/
|
||||
public String getCharacterEncoding() {
|
||||
return "EUC_TW";
|
||||
}
|
||||
|
||||
|
||||
protected int getNative(char unicode) {
|
||||
int i,
|
||||
cns; // 2 chars in CNS table make 1 CNS code
|
||||
|
||||
if (unicode < UniTab2[0]) {
|
||||
if ((i = searchTab(unicode, UniTab1)) == -1)
|
||||
return -1;
|
||||
cns = (CNSTab1[2*i] << 16) + CNSTab1[2*i+1];
|
||||
return cns;
|
||||
} else if (unicode < UniTab3[0]) {
|
||||
if ((i = searchTab(unicode, UniTab2)) == -1)
|
||||
return -1;
|
||||
cns = (CNSTab2[2*i] << 16) + CNSTab2[2*i+1];
|
||||
return cns;
|
||||
} else {
|
||||
if ((i = searchTab(unicode, UniTab3)) == -1)
|
||||
return -1;
|
||||
cns = (CNSTab3[2*i] << 16) + CNSTab3[2*i+1];
|
||||
return cns;
|
||||
}
|
||||
}
|
||||
|
||||
protected int searchTab(char code, char [] table) {
|
||||
int i = 0, l, h;
|
||||
|
||||
for (l = 0, h = table.length - 1; l < h; ) {
|
||||
if (table[l] == code) {
|
||||
i = l;
|
||||
break;
|
||||
}
|
||||
if (table[h] == code) {
|
||||
i = h;
|
||||
break;
|
||||
}
|
||||
i = (l + h) / 2;
|
||||
if (table[i] == code)
|
||||
break;
|
||||
if (table[i] < code)
|
||||
l = i + 1;
|
||||
else h = i - 1;
|
||||
}
|
||||
if (code == table[i]) {
|
||||
return i;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private int unicodeToEUC(char unicode, byte ebyte[]) {
|
||||
int cns = getNative(unicode);
|
||||
|
||||
if ((cns >> 16) == 0x01) { // Plane 1
|
||||
ebyte[0] = (byte) (((cns & 0xff00) >> 8) | MSB);
|
||||
ebyte[1] = (byte) ((cns & 0xff) | MSB);
|
||||
return 2;
|
||||
}
|
||||
|
||||
byte cnsPlane = (byte)(cns >> 16);
|
||||
if (cnsPlane >= (byte)0x02) { // Plane 2
|
||||
ebyte[0] = SS2;
|
||||
ebyte[1] = (byte) (cnsPlane | (byte)0xA0);
|
||||
ebyte[2] = (byte) (((cns & 0xff00) >> 8) | MSB);
|
||||
ebyte[3] = (byte) ((cns & 0xff) | MSB);
|
||||
return 4;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
protected int unicodeToEUC(char unicode) {
|
||||
if (unicode <= 0x7F) { // ASCII falls into EUC_TW CS0
|
||||
return unicode;
|
||||
}
|
||||
|
||||
int cns = getNative(unicode);
|
||||
int plane = cns >> 16;
|
||||
int euc = (cns & 0x0000FFFF) | 0x00008080;
|
||||
|
||||
if (plane == 1) {
|
||||
return euc;
|
||||
} else if (plane == 2) {
|
||||
return ((SS2 << 24) & 0xFF000000) | ((P2 << 16) & 0x00FF0000) |
|
||||
euc;
|
||||
} else if (plane == 3) {
|
||||
return ((SS2 << 24) & 0xFF000000) | ((P3 << 16) & 0x00FF0000) |
|
||||
euc;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
private char [] UniTab1 = uniTab1.toCharArray();
|
||||
private char [] UniTab2 = uniTab2.toCharArray();
|
||||
private char [] UniTab3 = uniTab3.toCharArray();
|
||||
private char [] CNSTab1 = cnsTab1.toCharArray();
|
||||
private char [] CNSTab2 = cnsTab2.toCharArray();
|
||||
private char [] CNSTab3 = cnsTab3.toCharArray();
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -389,8 +389,8 @@ abstract class ISO2022
|
||||
protected static class Encoder extends CharsetEncoder {
|
||||
private final Surrogate.Parser sgp = new Surrogate.Parser();
|
||||
private final byte SS2 = (byte)0x8e;
|
||||
private final byte P2 = (byte)0xA2;
|
||||
private final byte P3 = (byte)0xA3;
|
||||
private final byte PLANE2 = (byte)0xA2;
|
||||
private final byte PLANE3 = (byte)0xA3;
|
||||
private final byte MSB = (byte)0x80;
|
||||
|
||||
protected final byte maximumDesignatorLength = 4;
|
||||
@ -460,32 +460,32 @@ abstract class ISO2022
|
||||
ebyte[index++] = (byte)(convByte[0] & 0x7f);
|
||||
ebyte[index++] = (byte)(convByte[1] & 0x7f);
|
||||
} else {
|
||||
if((convByte[0] == SS2) && (convByte[1] == P2)) {
|
||||
if (!SS2DesDefined) {
|
||||
newSS2DesDefined = true;
|
||||
ebyte[0] = ISO_ESC;
|
||||
tmpByte = SS2Desig.getBytes();
|
||||
System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length);
|
||||
index = tmpByte.length+1;
|
||||
if(convByte[0] == SS2) {
|
||||
if (convByte[1] == PLANE2) {
|
||||
if (!SS2DesDefined) {
|
||||
newSS2DesDefined = true;
|
||||
ebyte[0] = ISO_ESC;
|
||||
tmpByte = SS2Desig.getBytes();
|
||||
System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length);
|
||||
index = tmpByte.length+1;
|
||||
}
|
||||
ebyte[index++] = ISO_ESC;
|
||||
ebyte[index++] = ISO_SS2_7;
|
||||
ebyte[index++] = (byte)(convByte[2] & 0x7f);
|
||||
ebyte[index++] = (byte)(convByte[3] & 0x7f);
|
||||
} else if (convByte[1] == PLANE3) {
|
||||
if(!SS3DesDefined){
|
||||
newSS3DesDefined = true;
|
||||
ebyte[0] = ISO_ESC;
|
||||
tmpByte = SS3Desig.getBytes();
|
||||
System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length);
|
||||
index = tmpByte.length+1;
|
||||
}
|
||||
ebyte[index++] = ISO_ESC;
|
||||
ebyte[index++] = ISO_SS3_7;
|
||||
ebyte[index++] = (byte)(convByte[2] & 0x7f);
|
||||
ebyte[index++] = (byte)(convByte[3] & 0x7f);
|
||||
}
|
||||
ebyte[index++] = ISO_ESC;
|
||||
ebyte[index++] = ISO_SS2_7;
|
||||
ebyte[index++] = (byte)(convByte[2] & 0x7f);
|
||||
ebyte[index++] = (byte)(convByte[3] & 0x7f);
|
||||
}
|
||||
if((convByte[0] == SS2)&&(convByte[1] == 0xA3))
|
||||
{
|
||||
if(!SS3DesDefined){
|
||||
newSS3DesDefined = true;
|
||||
ebyte[0] = ISO_ESC;
|
||||
tmpByte = SS3Desig.getBytes();
|
||||
System.arraycopy(tmpByte, 0, ebyte, 1, tmpByte.length);
|
||||
index = tmpByte.length+1;
|
||||
}
|
||||
ebyte[index++] = ISO_ESC;
|
||||
ebyte[index++] = ISO_SS3_7;
|
||||
ebyte[index++] = (byte)(convByte[2] & 0x7f);
|
||||
ebyte[index++] = (byte)(convByte[3] & 0x7f);
|
||||
}
|
||||
}
|
||||
return index;
|
||||
|
||||
@ -105,17 +105,19 @@ public class ISO2022_CN
|
||||
private char cnsDecode(byte byte1, byte byte2, byte SS) {
|
||||
byte1 |= MSB;
|
||||
byte2 |= MSB;
|
||||
if (SS == ISO_SS2_7) {
|
||||
return cnsDecoder.convToUnicode(byte1, byte2,
|
||||
cnsDecoder.unicodeCNS2);
|
||||
|
||||
} else { //SS == ISO_SS3_7
|
||||
char[] outSurr = cnsDecoder.convToSurrogate(byte1, byte2,
|
||||
cnsDecoder.unicodeCNS3);
|
||||
if (outSurr == null || outSurr[0] != '\u0000')
|
||||
return REPLACE_CHAR;
|
||||
return outSurr[1];
|
||||
}
|
||||
int p = 0;
|
||||
if (SS == ISO_SS2_7)
|
||||
p = 1; //plane 2, index -- 1
|
||||
else if (SS == ISO_SS3_7)
|
||||
p = 2; //plane 3, index -- 2
|
||||
else
|
||||
return REPLACE_CHAR; //never happen.
|
||||
char[] ret = cnsDecoder.toUnicode(byte1 & 0xff,
|
||||
byte2 & 0xff,
|
||||
p);
|
||||
if (ret == null || ret.length == 2)
|
||||
return REPLACE_CHAR;
|
||||
return ret[0];
|
||||
}
|
||||
|
||||
private char SODecode(byte byte1, byte byte2, byte SOD) {
|
||||
@ -125,9 +127,12 @@ public class ISO2022_CN
|
||||
return gb2312Decoder.decodeDouble(byte1 & 0xff,
|
||||
byte2 & 0xff);
|
||||
} else { // SOD == SODesigCNS
|
||||
return cnsDecoder.convToUnicode(byte1,
|
||||
byte2,
|
||||
cnsDecoder.unicodeCNS1);
|
||||
char[] ret = cnsDecoder.toUnicode(byte1 & 0xff,
|
||||
byte2 & 0xff,
|
||||
0);
|
||||
if (ret == null)
|
||||
return REPLACE_CHAR;
|
||||
return ret[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved.
|
||||
* Copyright 2001-2008 Sun Microsystems, Inc. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -66,16 +66,19 @@ public abstract class X11CNS11643 extends Charset {
|
||||
super(cs);
|
||||
this.plane = plane;
|
||||
}
|
||||
|
||||
private byte[] bb = new byte[4];
|
||||
public boolean canEncode(char c) {
|
||||
if (c <= 0x7F) {
|
||||
return false;
|
||||
}
|
||||
int p = getNative(c) >> 16;
|
||||
if (p == 1 && plane == 0 ||
|
||||
p == 2 && plane == 2 ||
|
||||
p == 3 && plane == 3)
|
||||
return true;
|
||||
return false;
|
||||
int nb = toEUC(c, bb);
|
||||
if (nb == -1)
|
||||
return false;
|
||||
int p = 0;
|
||||
if (nb == 4)
|
||||
p = (bb[1] & 0xff) - 0xa0;
|
||||
return (p == plane);
|
||||
}
|
||||
|
||||
public boolean isLegalReplacement(byte[] repl) {
|
||||
@ -93,19 +96,26 @@ public abstract class X11CNS11643 extends Charset {
|
||||
try {
|
||||
while (sp < sl) {
|
||||
char c = sa[sp];
|
||||
if (c >= '\uFFFE' || c <= '\u007f')
|
||||
return CoderResult.unmappableForLength(1);
|
||||
int cns = getNative(c);
|
||||
int p = cns >> 16;
|
||||
if (p == 1 && plane == 0 ||
|
||||
p == 2 && plane == 2 ||
|
||||
p == 3 && plane == 3) {
|
||||
if (dl - dp < 2)
|
||||
return CoderResult.OVERFLOW;
|
||||
da[dp++] = (byte) ((cns >> 8) & 0x7f);
|
||||
da[dp++] = (byte) (cns & 0x7f);
|
||||
sp++;
|
||||
continue;
|
||||
if ( c > '\u007f'&& c < '\uFFFE') {
|
||||
int nb = toEUC(c, bb);
|
||||
if (nb != -1) {
|
||||
int p = 0;
|
||||
if (nb == 4)
|
||||
p = (bb[1] & 0xff) - 0xa0;
|
||||
if (p == plane) {
|
||||
if (dl - dp < 2)
|
||||
return CoderResult.OVERFLOW;
|
||||
if (nb == 2) {
|
||||
da[dp++] = (byte)(bb[0] & 0x7f);
|
||||
da[dp++] = (byte)(bb[1] & 0x7f);
|
||||
} else {
|
||||
da[dp++] = (byte)(bb[2] & 0x7f);
|
||||
da[dp++] = (byte)(bb[3] & 0x7f);
|
||||
}
|
||||
sp++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
return CoderResult.unmappableForLength(1);
|
||||
}
|
||||
@ -118,23 +128,17 @@ public abstract class X11CNS11643 extends Charset {
|
||||
}
|
||||
|
||||
private class Decoder extends EUC_TW.Decoder {
|
||||
int plane;
|
||||
private String table;
|
||||
protected Decoder(Charset cs, int plane) {
|
||||
super(cs);
|
||||
switch (plane) {
|
||||
case 0:
|
||||
table = unicodeCNS1;
|
||||
break;
|
||||
case 2:
|
||||
table = unicodeCNS2;
|
||||
break;
|
||||
case 3:
|
||||
table = unicodeCNS3;
|
||||
break;
|
||||
default:
|
||||
if (plane == 0)
|
||||
this.plane = plane;
|
||||
else if (plane == 2 || plane == 3)
|
||||
this.plane = plane - 1;
|
||||
else
|
||||
throw new IllegalArgumentException
|
||||
("Only planes 1, 2, and 3 supported");
|
||||
}
|
||||
}
|
||||
|
||||
//we only work on array backed buffer.
|
||||
@ -142,33 +146,26 @@ public abstract class X11CNS11643 extends Charset {
|
||||
byte[] sa = src.array();
|
||||
int sp = src.arrayOffset() + src.position();
|
||||
int sl = src.arrayOffset() + src.limit();
|
||||
assert (sp <= sl);
|
||||
sp = (sp <= sl ? sp : sl);
|
||||
|
||||
char[] da = dst.array();
|
||||
int dp = dst.arrayOffset() + dst.position();
|
||||
int dl = dst.arrayOffset() + dst.limit();
|
||||
assert (dp <= dl);
|
||||
dp = (dp <= dl ? dp : dl);
|
||||
|
||||
try {
|
||||
while (sp < sl) {
|
||||
if ( sl - sp < 2) {
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
byte b1 = sa[sp];
|
||||
byte b2 = sa[sp + 1];
|
||||
char c = convToUnicode((byte)(b1 | 0x80),
|
||||
(byte)(b2 | 0x80),
|
||||
table);
|
||||
if (c == replacement().charAt(0)
|
||||
//to keep the compatibility with b2cX11CNS11643
|
||||
/*|| c == '\u0000'*/) {
|
||||
int b1 = (sa[sp] & 0xff) | 0x80;
|
||||
int b2 = (sa[sp + 1] & 0xff) | 0x80;
|
||||
char[] cc = toUnicode(b1, b2, plane);
|
||||
// plane3 has non-bmp characters(added), x11cnsp3
|
||||
// however does not support them
|
||||
if (cc == null || cc.length == 2)
|
||||
return CoderResult.unmappableForLength(2);
|
||||
}
|
||||
if (dl - dp < 1)
|
||||
return CoderResult.OVERFLOW;
|
||||
da[dp++] = c;
|
||||
da[dp++] = cc[0];
|
||||
sp +=2;
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
|
||||
@ -22,7 +22,7 @@
|
||||
*/
|
||||
|
||||
/* @test
|
||||
@bug 4779029 4924625 6392664
|
||||
@bug 4779029 4924625 6392664 6730652
|
||||
@summary Test decoding of various permutations of valid ISO-2022-CN byte sequences
|
||||
*/
|
||||
|
||||
@ -387,6 +387,12 @@ public class TestISO2022CNDecoder
|
||||
};
|
||||
private static CoderResult test15_result = CoderResult.unmappableForLength(4);
|
||||
|
||||
private static boolean encodeTest6730652 () throws Exception {
|
||||
//sample p3 codepoints
|
||||
String strCNSP3 = "\u4e28\u4e36\u4e3f\u4e85\u4e05\u4e04\u5369\u53b6\u4e2a\u4e87\u4e49\u51e2\u56b8\u56b9\u56c4\u8053\u92b0";
|
||||
return strCNSP3.equals(new String(strCNSP3.getBytes("x-ISO-2022-CN-CNS"), "x-ISO-2022-CN-CNS"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Main program to test ISO2022CN conformance
|
||||
*
|
||||
@ -430,6 +436,9 @@ public class TestISO2022CNDecoder
|
||||
pass &= decodeTest(test13_bytes, test13_chars, "escapes13");
|
||||
pass &= decodeResultTest(test14_bytes, test14_result, "escapes14");
|
||||
pass &= decodeResultTest(test15_bytes, test15_result, "escapes15");
|
||||
|
||||
pass &= encodeTest6730652 ();
|
||||
|
||||
// PASS/FAIL status is what the whole thing is about.
|
||||
//
|
||||
if (! pass) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user