mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-01 03:30:34 +00:00
8027607: (rb) Provide UTF-8 based properties resource bundles
Reviewed-by: okutsu, sherman
This commit is contained in:
parent
520ba7be6c
commit
0dfdc19faa
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1996, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -40,8 +40,17 @@
|
||||
package java.util;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.MalformedInputException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.charset.UnmappableCharacterException;
|
||||
import java.security.AccessController;
|
||||
import java.util.Locale;
|
||||
import sun.security.action.GetPropertyAction;
|
||||
import sun.util.PropertyResourceBundleCharset;
|
||||
import sun.util.ResourceBundleEnumeration;
|
||||
|
||||
/**
|
||||
@ -108,11 +117,20 @@ import sun.util.ResourceBundleEnumeration;
|
||||
* <strong>Note:</strong> PropertyResourceBundle can be constructed either
|
||||
* from an InputStream or a Reader, which represents a property file.
|
||||
* Constructing a PropertyResourceBundle instance from an InputStream requires
|
||||
* that the input stream be encoded in ISO-8859-1. In that case, characters
|
||||
* that cannot be represented in ISO-8859-1 encoding must be represented by Unicode Escapes
|
||||
* as defined in section 3.3 of
|
||||
* <cite>The Java™ Language Specification</cite>
|
||||
* that the input stream be encoded in UTF-8. By default, if a
|
||||
* {@link java.nio.charset.MalformedInputException} or an
|
||||
* {@link java.nio.charset.UnmappableCharacterException} occurs on reading the
|
||||
* input stream, then the PropertyResourceBundle instance resets to the state
|
||||
* before the exception, re-reads the input stream in {@code ISO-8859-1}, and
|
||||
* continues reading. If the system property
|
||||
* {@code java.util.PropertyResourceBundle.encoding} is set to either
|
||||
* "ISO-8859-1" or "UTF-8", the input stream is solely read in that encoding,
|
||||
* and throws the exception if it encounters an invalid sequence.
|
||||
* If "ISO-8859-1" is specified, characters that cannot be represented in
|
||||
* ISO-8859-1 encoding must be represented by Unicode Escapes as defined in section
|
||||
* 3.3 of <cite>The Java™ Language Specification</cite>
|
||||
* whereas the other constructor which takes a Reader does not have that limitation.
|
||||
* Other encoding values are ignored for this system property.
|
||||
*
|
||||
* @see ResourceBundle
|
||||
* @see ListResourceBundle
|
||||
@ -120,10 +138,26 @@ import sun.util.ResourceBundleEnumeration;
|
||||
* @since 1.1
|
||||
*/
|
||||
public class PropertyResourceBundle extends ResourceBundle {
|
||||
|
||||
// Check whether the strict encoding is specified.
|
||||
// The possible encoding is either "ISO-8859-1" or "UTF-8".
|
||||
private static final String encoding =
|
||||
AccessController.doPrivileged(
|
||||
new GetPropertyAction("java.util.PropertyResourceBundle.encoding", ""))
|
||||
.toUpperCase(Locale.ROOT);
|
||||
|
||||
/**
|
||||
* Creates a property resource bundle from an {@link java.io.InputStream
|
||||
* InputStream}. The property file read with this constructor
|
||||
* must be encoded in ISO-8859-1.
|
||||
* InputStream}. This constructor reads the property file in UTF-8 by default.
|
||||
* If a {@link java.nio.charset.MalformedInputException} or an
|
||||
* {@link java.nio.charset.UnmappableCharacterException} occurs on reading the
|
||||
* input stream, then the PropertyResourceBundle instance resets to the state
|
||||
* before the exception, re-reads the input stream in {@code ISO-8859-1} and
|
||||
* continues reading. If the system property
|
||||
* {@code java.util.PropertyResourceBundle.encoding} is set to either
|
||||
* "ISO-8859-1" or "UTF-8", the input stream is solely read in that encoding,
|
||||
* and throws the exception if it encounters an invalid sequence. Other
|
||||
* encoding values are ignored for this system property.
|
||||
*
|
||||
* @param stream an InputStream that represents a property file
|
||||
* to read from.
|
||||
@ -131,12 +165,19 @@ public class PropertyResourceBundle extends ResourceBundle {
|
||||
* @throws NullPointerException if <code>stream</code> is null
|
||||
* @throws IllegalArgumentException if {@code stream} contains a
|
||||
* malformed Unicode escape sequence.
|
||||
* @throws MalformedInputException if the system property
|
||||
* {@code java.util.PropertyResourceBundle.encoding} is set to "UTF-8"
|
||||
* and {@code stream} contains an invalid UTF-8 byte sequence.
|
||||
* @throws UnmappableCharacterException if the system property
|
||||
* {@code java.util.PropertyResourceBundle.encoding} is set to "UTF-8"
|
||||
* and {@code stream} contains an unmappable UTF-8 byte sequence.
|
||||
*/
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
public PropertyResourceBundle (InputStream stream) throws IOException {
|
||||
Properties properties = new Properties();
|
||||
properties.load(stream);
|
||||
lookup = new HashMap(properties);
|
||||
this(new InputStreamReader(stream,
|
||||
"ISO-8859-1".equals(encoding) ?
|
||||
StandardCharsets.ISO_8859_1.newDecoder() :
|
||||
new PropertyResourceBundleCharset("UTF-8".equals(encoding)).newDecoder()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package sun.util;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Objects;
|
||||
import sun.util.logging.PlatformLogger;
|
||||
|
||||
/**
|
||||
* A Charset implementation for reading PropertyResourceBundle, in order
|
||||
* for loading properties files. This first tries to load the properties
|
||||
* file with UTF-8 encoding). If it fails, then load the file with ISO-8859-1
|
||||
*/
|
||||
public class PropertyResourceBundleCharset extends Charset {
|
||||
|
||||
private boolean strictUTF8 = false;
|
||||
|
||||
public PropertyResourceBundleCharset(boolean strictUTF8) {
|
||||
this(PropertyResourceBundleCharset.class.getCanonicalName(), null);
|
||||
this.strictUTF8 = strictUTF8;
|
||||
}
|
||||
|
||||
public PropertyResourceBundleCharset(String canonicalName, String[] aliases) {
|
||||
super(canonicalName, aliases);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(Charset cs) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new PropertiesFileDecoder(this, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharsetEncoder newEncoder() {
|
||||
throw new UnsupportedOperationException("Encoding is not supported");
|
||||
}
|
||||
|
||||
private final class PropertiesFileDecoder extends CharsetDecoder {
|
||||
|
||||
private CharsetDecoder cdUTF_8 = StandardCharsets.UTF_8.newDecoder()
|
||||
.onMalformedInput(CodingErrorAction.REPORT)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
private CharsetDecoder cdISO_8859_1 = null;
|
||||
|
||||
protected PropertiesFileDecoder(Charset cs,
|
||||
float averageCharsPerByte, float maxCharsPerByte) {
|
||||
super(cs, averageCharsPerByte, maxCharsPerByte);
|
||||
}
|
||||
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
if (Objects.nonNull(cdISO_8859_1)) {
|
||||
return cdISO_8859_1.decode(in, out, false);
|
||||
}
|
||||
in.mark();
|
||||
out.mark();
|
||||
|
||||
CoderResult cr = cdUTF_8.decode(in, out, false);
|
||||
if (cr.isUnderflow() || cr.isOverflow() ||
|
||||
PropertyResourceBundleCharset.this.strictUTF8) {
|
||||
return cr;
|
||||
}
|
||||
|
||||
in.reset();
|
||||
out.reset();
|
||||
|
||||
PlatformLogger.getLogger(getClass().getCanonicalName()).info(
|
||||
"Invalid or unmappable UTF-8 sequence detected. " +
|
||||
"Switching encoding from UTF-8 to ISO-8859-1");
|
||||
cdISO_8859_1 = StandardCharsets.ISO_8859_1.newDecoder();
|
||||
return cdISO_8859_1.decode(in, out, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
/*
|
||||
* @test
|
||||
* @bug 8027607
|
||||
* @summary Test UTF-8 based properties files can be loaded successfully,
|
||||
* @run main CodePointTest
|
||||
* @run main/othervm -Djava.util.PropertyResourceBundle.encoding=ISO-8859-1 CodePointTest
|
||||
* @run main/othervm -Djava.util.PropertyResourceBundle.encoding=UTF-8 CodePointTest
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.*;
|
||||
import java.nio.file.*;
|
||||
import java.util.*;
|
||||
import static java.util.ResourceBundle.Control;
|
||||
import java.util.stream.*;
|
||||
|
||||
/*
|
||||
* Dumps every legal characters in ISO-8859-1/UTF-8 into
|
||||
* a <CharSet>.properties file. Each entry has a form of
|
||||
* "keyXXXX=c", where "XXXX" is a code point (variable length)
|
||||
* and "c" is the character encoded in the passed character set.
|
||||
* Then, load it with ResourceBundle.Control.newBundle() and compare both
|
||||
* contents. This confirms the following two functions:
|
||||
* - For UTF-8.properties, UTF-8 code points are loaded correctly
|
||||
* - For ISO-8859-1.properties, UTF-8->ISO-8859-1 fallback works
|
||||
*
|
||||
* Does the same test with "java.util.PropertyResourceBundle.encoding"
|
||||
* to "ISO-8859-1", and confirms only UTF-8 properties loading fails.
|
||||
*/
|
||||
public class CodePointTest {
|
||||
static final Charset[] props = {StandardCharsets.ISO_8859_1,
|
||||
StandardCharsets.UTF_8,
|
||||
StandardCharsets.US_ASCII};
|
||||
static final String encoding =
|
||||
System.getProperty("java.util.PropertyResourceBundle.encoding", "");
|
||||
|
||||
public static void main(String[] args) {
|
||||
for (Charset cs : props) {
|
||||
try {
|
||||
checkProps(cs,
|
||||
cs == StandardCharsets.UTF_8 &&
|
||||
encoding.equals("ISO-8859-1"));
|
||||
|
||||
if (cs == StandardCharsets.ISO_8859_1 &&
|
||||
encoding.equals("UTF-8")) {
|
||||
// should not happen
|
||||
throw new RuntimeException("Reading ISO-8859-1 properties in "+
|
||||
"strict UTF-8 encoding should throw an exception");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
if ((e instanceof MalformedInputException ||
|
||||
e instanceof UnmappableCharacterException) &&
|
||||
cs == StandardCharsets.ISO_8859_1 &&
|
||||
encoding.equals("UTF-8")) {
|
||||
// Expected exception is correctly detected.
|
||||
} else {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void checkProps(Charset cs, boolean shouldFail) throws IOException {
|
||||
int start = Character.MIN_CODE_POINT;
|
||||
int end= 0;
|
||||
|
||||
switch (cs.name()) {
|
||||
case "ISO-8859-1":
|
||||
end = 0xff;
|
||||
break;
|
||||
case "UTF-8":
|
||||
end = Character.MAX_CODE_POINT;
|
||||
break;
|
||||
case "US-ASCII":
|
||||
end = 0x7f;
|
||||
break;
|
||||
default:
|
||||
assert false;
|
||||
}
|
||||
|
||||
Properties p = new Properties();
|
||||
String outputName = cs.name() + ".properties";
|
||||
|
||||
// Forget previous test artifacts
|
||||
ResourceBundle.clearCache();
|
||||
|
||||
IntStream.range(start, end+1).forEach(c ->
|
||||
{
|
||||
if (Character.isDefined(c) &&
|
||||
(Character.isSupplementaryCodePoint(c) ||
|
||||
!Character.isSurrogate((char)c))) {
|
||||
p.setProperty("key"+Integer.toHexString(c),
|
||||
Character.isSupplementaryCodePoint(c) ?
|
||||
String.valueOf(Character.toChars(c)) :
|
||||
Character.toString((char)c));
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
try (BufferedWriter bw = Files.newBufferedWriter(
|
||||
FileSystems.getDefault().getPath(System.getProperty("test.classes", "."),
|
||||
outputName), cs)) {
|
||||
p.store(bw, null);
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
// try loading it
|
||||
Control c = Control.getControl(Control.FORMAT_PROPERTIES);
|
||||
ResourceBundle rb;
|
||||
try {
|
||||
rb = c.newBundle(cs.name(), Locale.ROOT, "java.properties",
|
||||
CodePointTest.class.getClassLoader(), false);
|
||||
} catch (IllegalAccessException |
|
||||
InstantiationException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
Properties result = new Properties();
|
||||
rb.keySet().stream().forEach((key) -> {
|
||||
result.setProperty(key, rb.getString(key));
|
||||
});
|
||||
|
||||
if (!p.equals(result) && !shouldFail) {
|
||||
System.out.println("Charset: "+cs);
|
||||
rb.keySet().stream().sorted().forEach((key) -> {
|
||||
if (!p.getProperty(key).equals(result.getProperty(key))) {
|
||||
System.out.println(key+": file: "+p.getProperty(key)+", RB: "+result.getProperty(key));
|
||||
}
|
||||
});
|
||||
throw new RuntimeException("not equal!");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
/*
|
||||
* @test
|
||||
* @bug 8027607
|
||||
* @summary Test whether illegal UTF-8 sequences are handled correctly.
|
||||
* @run main/othervm -Djava.util.PropertyResourceBundle.encoding=UTF-8 IllegalSequenceTest
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.*;
|
||||
import java.util.*;
|
||||
|
||||
public class IllegalSequenceTest {
|
||||
static final byte[][] illegalSequences = {
|
||||
{(byte)0xc0, (byte)0xaf}, // non-shortest UTF-8
|
||||
{(byte)0xc2, (byte)0xe0}, // consecutive leading bytes
|
||||
{(byte)0xc2, (byte)0x80, (byte)0x80}, // two byte leading + 2 trailing
|
||||
{(byte)0xe0, (byte)0x80}, // three byte leading + 1 trailing
|
||||
{(byte)0xf4, (byte)0x90, (byte)0x80, (byte)0x80}, // 0x110000 (over U+10FFFF)
|
||||
};
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
for (byte[] illegalSec: illegalSequences) {
|
||||
try (InputStream is = new ByteArrayInputStream(illegalSec)) {
|
||||
ResourceBundle rb = new PropertyResourceBundle(is);
|
||||
rb.getString("key");
|
||||
} catch (MalformedInputException |
|
||||
UnmappableCharacterException e) {
|
||||
// success
|
||||
continue;
|
||||
}
|
||||
throw new RuntimeException("Excepted exception was not thrown.");
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user