8202329: [AIX] Fix codepage mappings for IBM-943 and Big5

Reviewed-by: simonis, stuefe
This commit is contained in:
Bhaktavatsal R Maram 2018-06-12 13:00:50 +05:30 committed by Volker Simonis
parent 4f38d4ff72
commit dc7d03e19d
2 changed files with 67 additions and 2 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -297,7 +297,23 @@ static int ParseLocale(JNIEnv* env, int cat, char ** std_language, char ** std_s
if (strcmp(p, "EUC-JP") == 0) {
*std_encoding = "EUC-JP-LINUX";
}
#else
#endif
#ifdef _AIX
if (strcmp(p, "big5") == 0) {
/* On AIX Traditional Chinese Big5 codeset is mapped to IBM-950 */
*std_encoding = "IBM-950";
} else if (strcmp(p, "IBM-943") == 0) {
/*
* On AIX, IBM-943 is mapped to IBM-943C in which symbol 'yen' and
* 'overline' are replaced with 'backslash' and 'tilde' from ASCII
* making first 96 code points same as ASCII.
*/
*std_encoding = "IBM-943C";
}
#endif
#ifdef __solaris__
if (strcmp(p,"eucJP") == 0) {
/* For Solaris use customized vendor defined character
* customized EUC-JP converter

View File

@ -30,6 +30,7 @@
import java.io.*;
import java.nio.*;
import java.nio.charset.*;
import java.util.Arrays;
public class TestIBMBugs {
@ -173,6 +174,53 @@ public class TestIBMBugs {
}
}
private static void bug8202329() throws Exception {
String original = "\\\u007E\u00A5\u203E"; // "b"; \\ [backslash][tilde][yen][overscore]
byte[] expectedBytes; // bytes after conversion
String expectedStringfromBytes; // String constructed from bytes
Charset charset; // charset used for conversion
ByteBuffer bb; // Buffer that holds encoded bytes
byte[] ba; // byte array that holds encoded bytes
CharBuffer cb; // Buffer that holds decoded chars
// Test IBM943, where \ and ~ are encoded to unmappable i.e., 0x3f
// and ¥ andoverscore are encoded to 0x5c and 0x7e
charset = Charset.forName("IBM943");
expectedBytes = new byte[] {0x3f, 0x3f, 0x5c, 0x7e};
expectedStringfromBytes = "??\u00A5\u203E";
bb = charset.encode(original);
ba = new byte[bb.remaining()];
bb.get(ba, 0, ba.length);
if(!Arrays.equals(ba, expectedBytes)) {
throw new Exception("IBM943 failed to encode");
}
cb = charset.decode(ByteBuffer.wrap(expectedBytes));
if(!cb.toString().equals(expectedStringfromBytes)) {
throw new Exception("IBM943 failed to decode");
}
// Test IBM943C, where \ and ~ are encoded to 0x5c and 0x7e
// and ¥ an overscore are encoded to 0x5c and 0x7e
charset = Charset.forName("IBM943C");
expectedBytes = new byte[] {0x5c, 0x7e, 0x5c, 0x7e};
expectedStringfromBytes = "\\~\\~";
bb = charset.encode(original);
ba = new byte[bb.remaining()];
bb.get(ba, 0, ba.length);
if(!Arrays.equals(ba, expectedBytes)) {
throw new Exception("IBM943C failed to encode");
}
cb = charset.decode(ByteBuffer.wrap(expectedBytes));
if(!cb.toString().equals(expectedStringfromBytes)) {
throw new Exception("IBM943C failed to decode");
}
}
public static void main (String[] args) throws Exception {
bug6577466();
// need to be tested before any other IBM949C test case
@ -183,5 +231,6 @@ public class TestIBMBugs {
bug6371619();
bug6371431();
bug6569191();
bug8202329();
}
}