8027607: (rb) Provide UTF-8 based properties resource bundles

Reviewed-by: okutsu, sherman
This commit is contained in:
Naoto Sato 2015-07-29 13:36:53 -07:00
parent 520ba7be6c
commit 0dfdc19faa
4 changed files with 369 additions and 10 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1996, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -40,8 +40,17 @@
package java.util;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnmappableCharacterException;
import java.security.AccessController;
import java.util.Locale;
import sun.security.action.GetPropertyAction;
import sun.util.PropertyResourceBundleCharset;
import sun.util.ResourceBundleEnumeration;
/**
@ -108,11 +117,20 @@ import sun.util.ResourceBundleEnumeration;
* <strong>Note:</strong> PropertyResourceBundle can be constructed either
* from an InputStream or a Reader, which represents a property file.
* Constructing a PropertyResourceBundle instance from an InputStream requires
* that the input stream be encoded in ISO-8859-1. In that case, characters
* that cannot be represented in ISO-8859-1 encoding must be represented by Unicode Escapes
* as defined in section 3.3 of
* <cite>The Java&trade; Language Specification</cite>
* that the input stream be encoded in UTF-8. By default, if a
* {@link java.nio.charset.MalformedInputException} or an
* {@link java.nio.charset.UnmappableCharacterException} occurs on reading the
* input stream, then the PropertyResourceBundle instance resets to the state
* before the exception, re-reads the input stream in {@code ISO-8859-1}, and
* continues reading. If the system property
* {@code java.util.PropertyResourceBundle.encoding} is set to either
* "ISO-8859-1" or "UTF-8", the input stream is solely read in that encoding,
* and throws the exception if it encounters an invalid sequence.
* If "ISO-8859-1" is specified, characters that cannot be represented in
* ISO-8859-1 encoding must be represented by Unicode Escapes as defined in section
* 3.3 of <cite>The Java&trade; Language Specification</cite>
* whereas the other constructor which takes a Reader does not have that limitation.
* Other encoding values are ignored for this system property.
*
* @see ResourceBundle
* @see ListResourceBundle
@ -120,10 +138,26 @@ import sun.util.ResourceBundleEnumeration;
* @since 1.1
*/
public class PropertyResourceBundle extends ResourceBundle {
// Check whether the strict encoding is specified.
// The possible encoding is either "ISO-8859-1" or "UTF-8".
private static final String encoding =
AccessController.doPrivileged(
new GetPropertyAction("java.util.PropertyResourceBundle.encoding", ""))
.toUpperCase(Locale.ROOT);
/**
* Creates a property resource bundle from an {@link java.io.InputStream
* InputStream}. The property file read with this constructor
* must be encoded in ISO-8859-1.
* InputStream}. This constructor reads the property file in UTF-8 by default.
* If a {@link java.nio.charset.MalformedInputException} or an
* {@link java.nio.charset.UnmappableCharacterException} occurs on reading the
* input stream, then the PropertyResourceBundle instance resets to the state
* before the exception, re-reads the input stream in {@code ISO-8859-1} and
* continues reading. If the system property
* {@code java.util.PropertyResourceBundle.encoding} is set to either
* "ISO-8859-1" or "UTF-8", the input stream is solely read in that encoding,
* and throws the exception if it encounters an invalid sequence. Other
* encoding values are ignored for this system property.
*
* @param stream an InputStream that represents a property file
* to read from.
@ -131,12 +165,19 @@ public class PropertyResourceBundle extends ResourceBundle {
* @throws NullPointerException if <code>stream</code> is null
* @throws IllegalArgumentException if {@code stream} contains a
* malformed Unicode escape sequence.
* @throws MalformedInputException if the system property
* {@code java.util.PropertyResourceBundle.encoding} is set to "UTF-8"
* and {@code stream} contains an invalid UTF-8 byte sequence.
* @throws UnmappableCharacterException if the system property
* {@code java.util.PropertyResourceBundle.encoding} is set to "UTF-8"
* and {@code stream} contains an unmappable UTF-8 byte sequence.
*/
@SuppressWarnings({"unchecked", "rawtypes"})
public PropertyResourceBundle (InputStream stream) throws IOException {
Properties properties = new Properties();
properties.load(stream);
lookup = new HashMap(properties);
this(new InputStreamReader(stream,
"ISO-8859-1".equals(encoding) ?
StandardCharsets.ISO_8859_1.newDecoder() :
new PropertyResourceBundleCharset("UTF-8".equals(encoding)).newDecoder()));
}
/**

View File

@ -0,0 +1,107 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package sun.util;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import sun.util.logging.PlatformLogger;
/**
* A Charset implementation for reading PropertyResourceBundle, in order
* for loading properties files. This first tries to load the properties
* file with UTF-8 encoding). If it fails, then load the file with ISO-8859-1
*/
public class PropertyResourceBundleCharset extends Charset {
private boolean strictUTF8 = false;
public PropertyResourceBundleCharset(boolean strictUTF8) {
this(PropertyResourceBundleCharset.class.getCanonicalName(), null);
this.strictUTF8 = strictUTF8;
}
public PropertyResourceBundleCharset(String canonicalName, String[] aliases) {
super(canonicalName, aliases);
}
@Override
public boolean contains(Charset cs) {
return false;
}
@Override
public CharsetDecoder newDecoder() {
return new PropertiesFileDecoder(this, 1.0f, 1.0f);
}
@Override
public CharsetEncoder newEncoder() {
throw new UnsupportedOperationException("Encoding is not supported");
}
private final class PropertiesFileDecoder extends CharsetDecoder {
private CharsetDecoder cdUTF_8 = StandardCharsets.UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
private CharsetDecoder cdISO_8859_1 = null;
protected PropertiesFileDecoder(Charset cs,
float averageCharsPerByte, float maxCharsPerByte) {
super(cs, averageCharsPerByte, maxCharsPerByte);
}
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
if (Objects.nonNull(cdISO_8859_1)) {
return cdISO_8859_1.decode(in, out, false);
}
in.mark();
out.mark();
CoderResult cr = cdUTF_8.decode(in, out, false);
if (cr.isUnderflow() || cr.isOverflow() ||
PropertyResourceBundleCharset.this.strictUTF8) {
return cr;
}
in.reset();
out.reset();
PlatformLogger.getLogger(getClass().getCanonicalName()).info(
"Invalid or unmappable UTF-8 sequence detected. " +
"Switching encoding from UTF-8 to ISO-8859-1");
cdISO_8859_1 = StandardCharsets.ISO_8859_1.newDecoder();
return cdISO_8859_1.decode(in, out, false);
}
}
}

View File

@ -0,0 +1,155 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8027607
* @summary Test UTF-8 based properties files can be loaded successfully,
* @run main CodePointTest
* @run main/othervm -Djava.util.PropertyResourceBundle.encoding=ISO-8859-1 CodePointTest
* @run main/othervm -Djava.util.PropertyResourceBundle.encoding=UTF-8 CodePointTest
*/
import java.io.*;
import java.nio.charset.*;
import java.nio.file.*;
import java.util.*;
import static java.util.ResourceBundle.Control;
import java.util.stream.*;
/*
* Dumps every legal characters in ISO-8859-1/UTF-8 into
* a <CharSet>.properties file. Each entry has a form of
* "keyXXXX=c", where "XXXX" is a code point (variable length)
* and "c" is the character encoded in the passed character set.
* Then, load it with ResourceBundle.Control.newBundle() and compare both
* contents. This confirms the following two functions:
* - For UTF-8.properties, UTF-8 code points are loaded correctly
* - For ISO-8859-1.properties, UTF-8->ISO-8859-1 fallback works
*
* Does the same test with "java.util.PropertyResourceBundle.encoding"
* to "ISO-8859-1", and confirms only UTF-8 properties loading fails.
*/
public class CodePointTest {
static final Charset[] props = {StandardCharsets.ISO_8859_1,
StandardCharsets.UTF_8,
StandardCharsets.US_ASCII};
static final String encoding =
System.getProperty("java.util.PropertyResourceBundle.encoding", "");
public static void main(String[] args) {
for (Charset cs : props) {
try {
checkProps(cs,
cs == StandardCharsets.UTF_8 &&
encoding.equals("ISO-8859-1"));
if (cs == StandardCharsets.ISO_8859_1 &&
encoding.equals("UTF-8")) {
// should not happen
throw new RuntimeException("Reading ISO-8859-1 properties in "+
"strict UTF-8 encoding should throw an exception");
}
} catch (IOException e) {
if ((e instanceof MalformedInputException ||
e instanceof UnmappableCharacterException) &&
cs == StandardCharsets.ISO_8859_1 &&
encoding.equals("UTF-8")) {
// Expected exception is correctly detected.
} else {
throw new RuntimeException(e);
}
}
}
}
static void checkProps(Charset cs, boolean shouldFail) throws IOException {
int start = Character.MIN_CODE_POINT;
int end= 0;
switch (cs.name()) {
case "ISO-8859-1":
end = 0xff;
break;
case "UTF-8":
end = Character.MAX_CODE_POINT;
break;
case "US-ASCII":
end = 0x7f;
break;
default:
assert false;
}
Properties p = new Properties();
String outputName = cs.name() + ".properties";
// Forget previous test artifacts
ResourceBundle.clearCache();
IntStream.range(start, end+1).forEach(c ->
{
if (Character.isDefined(c) &&
(Character.isSupplementaryCodePoint(c) ||
!Character.isSurrogate((char)c))) {
p.setProperty("key"+Integer.toHexString(c),
Character.isSupplementaryCodePoint(c) ?
String.valueOf(Character.toChars(c)) :
Character.toString((char)c));
}
}
);
try (BufferedWriter bw = Files.newBufferedWriter(
FileSystems.getDefault().getPath(System.getProperty("test.classes", "."),
outputName), cs)) {
p.store(bw, null);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
// try loading it
Control c = Control.getControl(Control.FORMAT_PROPERTIES);
ResourceBundle rb;
try {
rb = c.newBundle(cs.name(), Locale.ROOT, "java.properties",
CodePointTest.class.getClassLoader(), false);
} catch (IllegalAccessException |
InstantiationException ex) {
throw new RuntimeException(ex);
}
Properties result = new Properties();
rb.keySet().stream().forEach((key) -> {
result.setProperty(key, rb.getString(key));
});
if (!p.equals(result) && !shouldFail) {
System.out.println("Charset: "+cs);
rb.keySet().stream().sorted().forEach((key) -> {
if (!p.getProperty(key).equals(result.getProperty(key))) {
System.out.println(key+": file: "+p.getProperty(key)+", RB: "+result.getProperty(key));
}
});
throw new RuntimeException("not equal!");
}
}
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8027607
* @summary Test whether illegal UTF-8 sequences are handled correctly.
* @run main/othervm -Djava.util.PropertyResourceBundle.encoding=UTF-8 IllegalSequenceTest
*/
import java.io.*;
import java.nio.charset.*;
import java.util.*;
public class IllegalSequenceTest {
static final byte[][] illegalSequences = {
{(byte)0xc0, (byte)0xaf}, // non-shortest UTF-8
{(byte)0xc2, (byte)0xe0}, // consecutive leading bytes
{(byte)0xc2, (byte)0x80, (byte)0x80}, // two byte leading + 2 trailing
{(byte)0xe0, (byte)0x80}, // three byte leading + 1 trailing
{(byte)0xf4, (byte)0x90, (byte)0x80, (byte)0x80}, // 0x110000 (over U+10FFFF)
};
public static void main(String[] args) throws IOException {
for (byte[] illegalSec: illegalSequences) {
try (InputStream is = new ByteArrayInputStream(illegalSec)) {
ResourceBundle rb = new PropertyResourceBundle(is);
rb.getString("key");
} catch (MalformedInputException |
UnmappableCharacterException e) {
// success
continue;
}
throw new RuntimeException("Excepted exception was not thrown.");
}
}
}