8032842: Locale.filterTags()/lookupTag() methods return lowercased language tags

8175539: Duplicate matching tags returned by Locale.filterTags() for LanguageRange("*")

Reviewed-by: naoto
This commit is contained in:
Nishit Jain 2017-07-07 12:19:00 +05:30 committed by Nishit Jain
parent 0bdd7c8d4d
commit 17d94f696c
4 changed files with 197 additions and 30 deletions

View File

@ -3257,6 +3257,9 @@ public final class Locale implements Cloneable, Serializable {
* Returns a list of matching {@code Locale} instances using the filtering
* mechanism defined in RFC 4647.
*
* This filter operation on the given {@code locales} ensures that only
* unique matching locale(s) are returned.
*
* @param priorityList user's Language Priority List in which each language
* tag is sorted in descending order based on priority or weight
* @param locales {@code Locale} instances used for matching
@ -3284,6 +3287,9 @@ public final class Locale implements Cloneable, Serializable {
* {@link #filter(List, Collection, FilteringMode)} when {@code mode} is
* {@link FilteringMode#AUTOSELECT_FILTERING}.
*
* This filter operation on the given {@code locales} ensures that only
* unique matching locale(s) are returned.
*
* @param priorityList user's Language Priority List in which each language
* tag is sorted in descending order based on priority or weight
* @param locales {@code Locale} instances used for matching
@ -3304,6 +3310,17 @@ public final class Locale implements Cloneable, Serializable {
* Returns a list of matching languages tags using the basic filtering
* mechanism defined in RFC 4647.
*
* This filter operation on the given {@code tags} ensures that only
* unique matching tag(s) are returned with preserved case. In case of
* duplicate matching tags with the case difference, the first matching
* tag with preserved case is returned.
* For example, "de-ch" is returned out of the duplicate matching tags
* "de-ch" and "de-CH", if "de-ch" is checked first for matching in the
* given {@code tags}. Note that if the given {@code tags} is an unordered
* {@code Collection}, the returned matching tag out of duplicate tags is
* subject to change, depending on the implementation of the
* {@code Collection}.
*
* @param priorityList user's Language Priority List in which each language
* tag is sorted in descending order based on priority or weight
* @param tags language tags
@ -3331,6 +3348,17 @@ public final class Locale implements Cloneable, Serializable {
* {@link #filterTags(List, Collection, FilteringMode)} when {@code mode}
* is {@link FilteringMode#AUTOSELECT_FILTERING}.
*
* This filter operation on the given {@code tags} ensures that only
* unique matching tag(s) are returned with preserved case. In case of
* duplicate matching tags with the case difference, the first matching
* tag with preserved case is returned.
* For example, "de-ch" is returned out of the duplicate matching tags
* "de-ch" and "de-CH", if "de-ch" is checked first for matching in the
* given {@code tags}. Note that if the given {@code tags} is an unordered
* {@code Collection}, the returned matching tag out of duplicate tags is
* subject to change, depending on the implementation of the
* {@code Collection}.
*
* @param priorityList user's Language Priority List in which each language
* tag is sorted in descending order based on priority or weight
* @param tags language tags
@ -3370,6 +3398,9 @@ public final class Locale implements Cloneable, Serializable {
* Returns the best-matching language tag using the lookup mechanism
* defined in RFC 4647.
*
* This lookup operation on the given {@code tags} ensures that the
* first matching tag with preserved case is returned.
*
* @param priorityList user's Language Priority List in which each language
* tag is sorted in descending order based on priority or weight
* @param tags language tangs used for matching

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,6 +34,9 @@ import java.util.Locale.*;
import static java.util.Locale.FilteringMode.*;
import static java.util.Locale.LanguageRange.*;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
/**
* Implementation for BCP47 Locale matching
@ -126,12 +129,16 @@ public final class LocaleMatcher {
return new ArrayList<String>(tags);
} else {
for (String tag : tags) {
tag = tag.toLowerCase(Locale.ROOT);
if (tag.startsWith(range)) {
// change to lowercase for case-insensitive matching
String lowerCaseTag = tag.toLowerCase(Locale.ROOT);
if (lowerCaseTag.startsWith(range)) {
int len = range.length();
if ((tag.length() == len || tag.charAt(len) == '-')
&& !list.contains(tag)
&& !shouldIgnoreFilterBasicMatch(zeroRanges, tag)) {
if ((lowerCaseTag.length() == len
|| lowerCaseTag.charAt(len) == '-')
&& !caseInsensitiveMatch(list, lowerCaseTag)
&& !shouldIgnoreFilterBasicMatch(zeroRanges,
lowerCaseTag)) {
// preserving the case of the input tag
list.add(tag);
}
}
@ -152,20 +159,43 @@ public final class LocaleMatcher {
private static Collection<String> removeTagsMatchingBasicZeroRange(
List<LanguageRange> zeroRange, Collection<String> tags) {
if (zeroRange.isEmpty()) {
tags = removeDuplicates(tags);
return tags;
}
List<String> matchingTags = new ArrayList<>();
for (String tag : tags) {
tag = tag.toLowerCase(Locale.ROOT);
if (!shouldIgnoreFilterBasicMatch(zeroRange, tag)) {
matchingTags.add(tag);
// change to lowercase for case-insensitive matching
String lowerCaseTag = tag.toLowerCase(Locale.ROOT);
if (!shouldIgnoreFilterBasicMatch(zeroRange, lowerCaseTag)
&& !caseInsensitiveMatch(matchingTags, lowerCaseTag)) {
matchingTags.add(tag); // preserving the case of the input tag
}
}
return matchingTags;
}
/**
* Remove duplicate tags from the given {@code tags} by
* ignoring case considerations.
*/
private static Collection<String> removeDuplicates(
Collection<String> tags) {
Set<String> distinctTags = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
return tags.stream().filter(x -> distinctTags.add(x))
.collect(Collectors.toList());
}
/**
* Returns true if the given {@code list} contains an element which matches
* with the given {@code tag} ignoring case considerations.
*/
private static boolean caseInsensitiveMatch(List<String> list, String tag) {
return list.stream().anyMatch((element)
-> (element.equalsIgnoreCase(tag)));
}
/**
* The tag which is falling in the basic exclusion range(s) should not
* be considered as the matching tag. Ignores the tag matching with the
@ -216,8 +246,9 @@ public final class LocaleMatcher {
}
String[] rangeSubtags = range.split("-");
for (String tag : tags) {
tag = tag.toLowerCase(Locale.ROOT);
String[] tagSubtags = tag.split("-");
// change to lowercase for case-insensitive matching
String lowerCaseTag = tag.toLowerCase(Locale.ROOT);
String[] tagSubtags = lowerCaseTag.split("-");
if (!rangeSubtags[0].equals(tagSubtags[0])
&& !rangeSubtags[0].equals("*")) {
continue;
@ -225,9 +256,11 @@ public final class LocaleMatcher {
int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,
tagSubtags);
if (rangeSubtags.length == rangeIndex && !list.contains(tag)
&& !shouldIgnoreFilterExtendedMatch(zeroRanges, tag)) {
list.add(tag);
if (rangeSubtags.length == rangeIndex
&& !caseInsensitiveMatch(list, lowerCaseTag)
&& !shouldIgnoreFilterExtendedMatch(zeroRanges,
lowerCaseTag)) {
list.add(tag); // preserve the case of the input tag
}
}
}
@ -245,14 +278,17 @@ public final class LocaleMatcher {
private static Collection<String> removeTagsMatchingExtendedZeroRange(
List<LanguageRange> zeroRange, Collection<String> tags) {
if (zeroRange.isEmpty()) {
tags = removeDuplicates(tags);
return tags;
}
List<String> matchingTags = new ArrayList<>();
for (String tag : tags) {
tag = tag.toLowerCase(Locale.ROOT);
if (!shouldIgnoreFilterExtendedMatch(zeroRange, tag)) {
matchingTags.add(tag);
// change to lowercase for case-insensitive matching
String lowerCaseTag = tag.toLowerCase(Locale.ROOT);
if (!shouldIgnoreFilterExtendedMatch(zeroRange, lowerCaseTag)
&& !caseInsensitiveMatch(matchingTags, lowerCaseTag)) {
matchingTags.add(tag); // preserve the case of the input tag
}
}
@ -368,10 +404,11 @@ public final class LocaleMatcher {
String rangeForRegex = range.replace("*", "\\p{Alnum}*");
while (rangeForRegex.length() > 0) {
for (String tag : tags) {
tag = tag.toLowerCase(Locale.ROOT);
if (tag.matches(rangeForRegex)
&& !shouldIgnoreLookupMatch(zeroRanges, tag)) {
return tag;
// change to lowercase for case-insensitive matching
String lowerCaseTag = tag.toLowerCase(Locale.ROOT);
if (lowerCaseTag.matches(rangeForRegex)
&& !shouldIgnoreLookupMatch(zeroRanges, lowerCaseTag)) {
return tag; // preserve the case of the input tag
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,7 +23,7 @@
/*
* @test
* @bug 7069824 8042360
* @bug 7069824 8042360 8032842 8175539
* @summary Verify implementation for Locale matching.
* @run main Bug7069824
*/
@ -747,7 +747,7 @@ public class Bug7069824 {
priorityList = LanguageRange.parse(ranges);
tagList = generateLanguageTags(tags);
actualTags = showLanguageTags(Locale.filterTags(priorityList, tagList));
expectedTags = "ja-jp-hepburn, en";
expectedTags = "ja-JP-hepburn, en";
if (!expectedTags.equals(actualTags)) {
error = true;
@ -763,7 +763,7 @@ public class Bug7069824 {
priorityList = LanguageRange.parse(ranges);
tagList = generateLanguageTags(tags);
actualTags = showLanguageTags(Locale.filterTags(priorityList, tagList, mode));
expectedTags = "de-de, de-de-x-goethe";
expectedTags = "de-DE, de-DE-x-goethe";
if (!expectedTags.equals(actualTags)) {
error = true;
@ -779,8 +779,8 @@ public class Bug7069824 {
priorityList = LanguageRange.parse(ranges);
tagList = generateLanguageTags(tags);
actualTags = showLanguageTags(Locale.filterTags(priorityList, tagList, mode));
expectedTags = "de-de, de-latn-de, de-latf-de, de-de-x-goethe, "
+ "de-latn-de-1996, de-deva-de";
expectedTags = "de-DE, de-Latn-DE, de-Latf-DE, de-DE-x-goethe, "
+ "de-Latn-DE-1996, de-Deva-DE";
if (!expectedTags.equals(actualTags)) {
error = true;
@ -796,8 +796,8 @@ public class Bug7069824 {
priorityList = LanguageRange.parse(ranges);
tagList = generateLanguageTags(tags);
actualTags = showLanguageTags(Locale.filterTags(priorityList, tagList, mode));
expectedTags = "de-de, de-latn-de, de-latf-de, de-de-x-goethe, "
+ "de-latn-de-1996, de-deva-de";
expectedTags = "de-DE, de-Latn-DE, de-Latf-DE, de-DE-x-goethe, "
+ "de-Latn-DE-1996, de-Deva-DE";
if (!expectedTags.equals(actualTags)) {
error = true;
@ -884,7 +884,7 @@ public class Bug7069824 {
priorityList = LanguageRange.parse(ranges);
tagList = generateLanguageTags(tags);
actualTag = Locale.lookupTag(priorityList, tagList);
expectedTag = "fr-jp";
expectedTag = "fr-JP";
if (!expectedTag.equals(actualTag)) {
error = true;

View File

@ -0,0 +1,99 @@
/*
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8032842 8175539
* @summary Checks that the filterTags() and lookup() methods
* preserve the case of matching language tag(s).
* Before 8032842 fix these methods return the matching
* language tag(s) in lowercase.
* Also, checks the filterTags() to return only unique
* (ignoring case considerations) matching tags.
*
*/
import java.util.List;
import java.util.Locale;
import java.util.Locale.FilteringMode;
import java.util.Locale.LanguageRange;
public class Bug8032842 {
public static void main(String[] args) {
// test filterBasic() for preserving the case of matching tags for
// the language range '*', with no duplicates in the matching tags
testFilter("*", List.of("de-CH", "hi-in", "En-GB", "ja-Latn-JP",
"JA-JP", "en-GB"),
List.of("de-CH", "hi-in", "En-GB", "ja-Latn-JP", "JA-JP"),
FilteringMode.AUTOSELECT_FILTERING);
// test filterBasic() for preserving the case of matching tags for
// basic ranges other than *, with no duplicates in the matching tags
testFilter("mtm-RU, en-GB", List.of("En-Gb", "mTm-RU", "en-US",
"en-latn", "en-GB"),
List.of("mTm-RU", "En-Gb"), FilteringMode.AUTOSELECT_FILTERING);
// test filterExtended() for preserving the case of matching tags for
// the language range '*', with no duplicates in the matching tags
testFilter("*", List.of("de-CH", "hi-in", "En-GB", "hi-IN",
"ja-Latn-JP", "JA-JP"),
List.of("de-CH", "hi-in", "En-GB", "ja-Latn-JP", "JA-JP"),
FilteringMode.EXTENDED_FILTERING);
// test filterExtended() for preserving the case of matching tags for
// extended ranges other than *, with no duplicates in the matching tags
testFilter("*-ch;q=0.5, *-Latn;q=0.4", List.of("fr-CH", "de-Ch",
"en-latn", "en-US", "en-Latn"),
List.of("fr-CH", "de-Ch", "en-latn"),
FilteringMode.EXTENDED_FILTERING);
// test lookupTag() for preserving the case of matching tag
testLookup("*-ch;q=0.5", List.of("en", "fR-cH"), "fR-cH");
}
public static void testFilter(String ranges, List<String> tags,
List<String> expected, FilteringMode mode) {
List<LanguageRange> priorityList = LanguageRange.parse(ranges);
List<String> actual = Locale.filterTags(priorityList, tags, mode);
if (!actual.equals(expected)) {
throw new RuntimeException("[filterTags() failed for the language"
+ " range: " + ranges + ", Expected: " + expected
+ ", Found: " + actual + "]");
}
}
public static void testLookup(String ranges, List<String> tags,
String expected) {
List<LanguageRange> priorityList = LanguageRange.parse(ranges);
String actual = Locale.lookupTag(priorityList, tags);
if (!actual.equals(expected)) {
throw new RuntimeException("[lookupTag() failed for the language"
+ " range: " + ranges + ", Expected: " + expected
+ ", Found: " + actual + "]");
}
}
}