mirror of
https://github.com/openjdk/jdk.git
synced 2026-06-06 18:53:37 +00:00
8385736: Optimize ListFormat custom pattern parsing
Reviewed-by: jlu
This commit is contained in:
parent
6253a03af3
commit
4192f9bf2e
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -32,7 +32,6 @@ import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.IntStream;
|
||||
import sun.util.locale.provider.LocaleProviderAdapter;
|
||||
|
||||
@ -112,6 +111,7 @@ public final class ListFormat extends Format {
|
||||
private static final int TWO = 3;
|
||||
private static final int THREE = 4;
|
||||
private static final int PATTERN_ARRAY_LENGTH = THREE + 1;
|
||||
private static final int PLACEHOLDER_LENGTH = 3; // i.e., "{i}".length()
|
||||
|
||||
/**
|
||||
* The locale to use for formatting list patterns.
|
||||
@ -126,14 +126,11 @@ public final class ListFormat extends Format {
|
||||
*/
|
||||
private final String[] patterns;
|
||||
|
||||
private static final Pattern PARSE_START = Pattern.compile("(.*?)\\{0}(.*?)\\{1}");
|
||||
private static final Pattern PARSE_MIDDLE = Pattern.compile("\\{0}(.*?)\\{1}");
|
||||
private static final Pattern PARSE_END = Pattern.compile("\\{0}(.*?)\\{1}(.*?)");
|
||||
private static final Pattern PARSE_TWO = Pattern.compile("(.*?)\\{0}(.*?)\\{1}(.*?)");
|
||||
private static final Pattern PARSE_THREE = Pattern.compile("(.*?)\\{0}(.*?)\\{1}(.*?)\\{2}(.*?)");
|
||||
private transient Pattern startPattern;
|
||||
private transient String startBefore;
|
||||
private transient String startBetween;
|
||||
private transient String middleBetween;
|
||||
private transient Pattern endPattern;
|
||||
private transient String endBetween;
|
||||
private transient String endAfter;
|
||||
|
||||
private ListFormat(Locale l, String[] patterns) {
|
||||
locale = l;
|
||||
@ -149,50 +146,60 @@ public final class ListFormat extends Format {
|
||||
}
|
||||
}
|
||||
|
||||
// get pattern strings
|
||||
var m = PARSE_START.matcher(patterns[START]);
|
||||
String startBefore;
|
||||
String startBetween;
|
||||
if (m.matches()) {
|
||||
startBefore = m.group(1);
|
||||
startBetween = m.group(2);
|
||||
// Get pattern strings. Pattern conditions from LDML are:
|
||||
// - it contains the placeholders {0}, {1}, and {2} ("3"-pattern only) in order
|
||||
// - "start" and "middle" patterns end with the {1} placeholder
|
||||
// - "middle" and "end" patterns begin with the {0} placeholder
|
||||
var pattern = patterns[START];
|
||||
var placeholderPositions = findPlaceholders(pattern);
|
||||
if (placeholderPositions != null &&
|
||||
placeholderPositions[1] + PLACEHOLDER_LENGTH == pattern.length()) {
|
||||
startBefore = pattern.substring(0, placeholderPositions[0]);
|
||||
startBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH,
|
||||
placeholderPositions[1]);
|
||||
} else {
|
||||
throw new IllegalArgumentException("start pattern is incorrect: " + patterns[START]);
|
||||
throw new IllegalArgumentException("start pattern is incorrect: " + pattern);
|
||||
}
|
||||
m = PARSE_MIDDLE.matcher(patterns[MIDDLE]);
|
||||
if (m.matches()) {
|
||||
middleBetween = m.group(1);
|
||||
|
||||
pattern = patterns[MIDDLE];
|
||||
placeholderPositions = findPlaceholders(pattern);
|
||||
if (placeholderPositions != null &&
|
||||
placeholderPositions[0] == 0 &&
|
||||
placeholderPositions[1] + PLACEHOLDER_LENGTH == pattern.length()) {
|
||||
middleBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH,
|
||||
placeholderPositions[1]);
|
||||
} else {
|
||||
throw new IllegalArgumentException("middle pattern is incorrect: " + patterns[MIDDLE]);
|
||||
throw new IllegalArgumentException("middle pattern is incorrect: " + pattern);
|
||||
}
|
||||
m = PARSE_END.matcher(patterns[END]);
|
||||
String endBetween;
|
||||
String endAfter;
|
||||
if (m.matches()) {
|
||||
endBetween = m.group(1);
|
||||
endAfter = m.group(2);
|
||||
|
||||
pattern = patterns[END];
|
||||
placeholderPositions = findPlaceholders(pattern);
|
||||
if (placeholderPositions != null && placeholderPositions[0] == 0) {
|
||||
endBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH,
|
||||
placeholderPositions[1]);
|
||||
endAfter = pattern.substring(placeholderPositions[1] + PLACEHOLDER_LENGTH);
|
||||
} else {
|
||||
throw new IllegalArgumentException("end pattern is incorrect: " + patterns[END]);
|
||||
throw new IllegalArgumentException("end pattern is incorrect: " + pattern);
|
||||
}
|
||||
|
||||
// Validate two/three patterns, if given. Otherwise, generate them
|
||||
if (!patterns[TWO].isEmpty()) {
|
||||
if (!PARSE_TWO.matcher(patterns[TWO]).matches()) {
|
||||
throw new IllegalArgumentException("pattern for two is incorrect: " + patterns[TWO]);
|
||||
pattern = patterns[TWO];
|
||||
if (!pattern.isEmpty()) {
|
||||
if (findPlaceholders(pattern) == null) {
|
||||
throw new IllegalArgumentException("pattern for two is incorrect: " + pattern);
|
||||
}
|
||||
} else {
|
||||
patterns[TWO] = startBefore + "{0}" + endBetween + "{1}" + endAfter;
|
||||
}
|
||||
if (!patterns[THREE].isEmpty()) {
|
||||
if (!PARSE_THREE.matcher(patterns[THREE]).matches()) {
|
||||
throw new IllegalArgumentException("pattern for three is incorrect: " + patterns[THREE]);
|
||||
pattern = patterns[THREE];
|
||||
if (!pattern.isEmpty()) {
|
||||
placeholderPositions = findPlaceholders(pattern);
|
||||
if (placeholderPositions == null || placeholderPositions[2] == -1) {
|
||||
throw new IllegalArgumentException("pattern for three is incorrect: " + pattern);
|
||||
}
|
||||
} else {
|
||||
patterns[THREE] = startBefore + "{0}" + startBetween + "{1}" + endBetween + "{2}" + endAfter;
|
||||
}
|
||||
|
||||
startPattern = Pattern.compile(startBefore + "(.+?)" + startBetween);
|
||||
endPattern = Pattern.compile(endBetween + "(.+?)" + endAfter);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -455,21 +462,29 @@ public final class ListFormat extends Format {
|
||||
public Object parseObject(String source, ParsePosition parsePos) {
|
||||
Objects.requireNonNull(source);
|
||||
Objects.requireNonNull(parsePos);
|
||||
var sm = startPattern.matcher(source);
|
||||
var em = endPattern.matcher(source);
|
||||
var startPattern = findPattern(source, parsePos.getIndex(), startBefore, startBetween);
|
||||
var endPattern = findPattern(source, parsePos.getIndex(), endBetween, endAfter);
|
||||
Object parsed = null;
|
||||
if (sm.find(parsePos.getIndex()) && em.find(parsePos.getIndex())) {
|
||||
// get em to the last
|
||||
var c = em.start();
|
||||
while (em.find()) {
|
||||
c = em.start();
|
||||
if (startPattern != null && endPattern != null) {
|
||||
// get endPattern to the last
|
||||
var ep = endPattern;
|
||||
while ((ep = findPattern(source, ep[1], endBetween, endAfter)) != null) {
|
||||
endPattern = ep;
|
||||
}
|
||||
em.find(c);
|
||||
var startEnd = sm.end();
|
||||
var endStart = em.start();
|
||||
|
||||
var startEnd = startPattern[1];
|
||||
var endStart = endPattern[0];
|
||||
if (startEnd <= endStart) {
|
||||
var mid = source.substring(startEnd, endStart);
|
||||
var count = mid.split(middleBetween).length + 2;
|
||||
var count = 3;
|
||||
var mbLength = middleBetween.length();
|
||||
if (mbLength > 0) {
|
||||
var midIndex = 0;
|
||||
while ((midIndex = mid.indexOf(middleBetween, midIndex)) >= 0) {
|
||||
count++;
|
||||
midIndex += mbLength;
|
||||
}
|
||||
}
|
||||
parsed = new MessageFormat(createMessageFormatString(count), locale).parseObject(source, parsePos);
|
||||
}
|
||||
}
|
||||
@ -565,7 +580,9 @@ public final class ListFormat extends Format {
|
||||
private String createMessageFormatString(int count) {
|
||||
var sb = new StringBuilder(256).append(patterns[START]);
|
||||
IntStream.range(2, count - 1).forEach(i -> sb.append(middleBetween).append("{").append(i).append("}"));
|
||||
sb.append(patterns[END].replaceFirst("\\{0}", "").replaceFirst("\\{1}", "\\{" + (count - 1) + "\\}"));
|
||||
sb.append(endBetween)
|
||||
.append("{").append(count - 1).append("}")
|
||||
.append(endAfter);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@ -643,4 +660,51 @@ public final class ListFormat extends Format {
|
||||
*/
|
||||
NARROW
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return the positions of the "{0}", "{1}", and "{2}" placeholders in the
|
||||
* given pattern string, or null if the pattern is invalid}
|
||||
*
|
||||
* The returned array contains -1 for "{2}" if that placeholder is absent.
|
||||
*
|
||||
* @param pattern pattern string to parse
|
||||
*/
|
||||
private static int[] findPlaceholders(String pattern) {
|
||||
var positions = new int[3];
|
||||
for (int i = 0; i < positions.length; i++) {
|
||||
positions[i] = pattern.indexOf("{" + i + "}");
|
||||
}
|
||||
|
||||
// Check the existence and order of the placeholders
|
||||
if (positions[0] == -1 ||
|
||||
positions[1] == -1 ||
|
||||
positions[0] + PLACEHOLDER_LENGTH > positions[1] ||
|
||||
positions[2] != -1 && positions[1] + PLACEHOLDER_LENGTH > positions[2]) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return positions;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return the start and end positions of the first pattern found in
|
||||
* the given {@code source} starting at {@code pos}, or null if no such
|
||||
* pattern exists}
|
||||
*
|
||||
* The pattern must contain at least one character between the
|
||||
* {@code prefix} and {@code suffix} strings. The returned end position is
|
||||
* exclusive.
|
||||
*
|
||||
* @param source string to search
|
||||
* @param pos position at which to start the search
|
||||
* @param prefix starting string within the pattern
|
||||
* @param suffix ending string within the pattern
|
||||
*/
|
||||
private static int[] findPattern(String source, int pos, String prefix, String suffix) {
|
||||
var prefixPos = source.indexOf(prefix, pos);
|
||||
var suffixPos = prefixPos != -1 ? source.indexOf(suffix, prefixPos + prefix.length() + 1) : -1;
|
||||
|
||||
return prefixPos < suffixPos ?
|
||||
new int[] {prefixPos, suffixPos + suffix.length()} : null;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8041488 8316974 8318569 8306116
|
||||
* @bug 8041488 8316974 8318569 8306116 8385736
|
||||
* @summary Tests for ListFormat class
|
||||
* @run junit TestListFormat
|
||||
*/
|
||||
@ -66,6 +66,14 @@ public class TestListFormat {
|
||||
"",
|
||||
"",
|
||||
};
|
||||
// Ensures regex metacharacters in custom patterns are treated as literals.
|
||||
private static final String[] CUSTOM_PATTERNS_METACHAR = {
|
||||
". {0} * {1}",
|
||||
"{0} + {1}",
|
||||
"{0} | {1} [",
|
||||
"",
|
||||
"",
|
||||
};
|
||||
private static final String[] CUSTOM_PATTERNS_IAE_START = {
|
||||
"{0}",
|
||||
"{0} mid {1}",
|
||||
@ -120,7 +128,7 @@ public class TestListFormat {
|
||||
assertEquals(ListFormat.getInstance(), ListFormat.getInstance(Locale.getDefault(Locale.Category.FORMAT), ListFormat.Type.STANDARD, ListFormat.Style.FULL));
|
||||
}
|
||||
|
||||
static Arguments[] getInstance_1Arg() {
|
||||
private static Arguments[] getInstance_1Arg() {
|
||||
return new Arguments[] {
|
||||
arguments(CUSTOM_PATTERNS_FULL, SAMPLE1, "foo"),
|
||||
arguments(CUSTOM_PATTERNS_FULL, SAMPLE2, "twobef foo two bar twoaft"),
|
||||
@ -130,10 +138,14 @@ public class TestListFormat {
|
||||
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE2, "sbef foo ebet bar eaft"),
|
||||
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE3, "sbef foo sbet bar ebet baz eaft"),
|
||||
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE4, "sbef foo sbet bar mid baz ebet qux eaft"),
|
||||
arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE1, "foo"),
|
||||
arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE2, ". foo | bar ["),
|
||||
arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE3, ". foo * bar | baz ["),
|
||||
arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE4, ". foo * bar + baz | qux ["),
|
||||
};
|
||||
}
|
||||
|
||||
static Arguments[] getInstance_1Arg_IAE() {
|
||||
private static Arguments[] getInstance_1Arg_IAE() {
|
||||
return new Arguments[] {
|
||||
arguments(new String[1], "Pattern array length should be 5"),
|
||||
arguments(new String[6], "Pattern array length should be 5"),
|
||||
@ -146,7 +158,7 @@ public class TestListFormat {
|
||||
};
|
||||
}
|
||||
|
||||
static Arguments[] getInstance_3Arg() {
|
||||
private static Arguments[] getInstance_3Arg() {
|
||||
return new Arguments[] {
|
||||
arguments(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.FULL,
|
||||
"foo, bar, and baz", true),
|
||||
@ -188,7 +200,7 @@ public class TestListFormat {
|
||||
};
|
||||
}
|
||||
|
||||
static Arguments[] parseObject_parsePos() {
|
||||
private static Arguments[] parseObject_parsePos() {
|
||||
return new Arguments[] {
|
||||
arguments(CUSTOM_PATTERNS_FULL, SAMPLE1),
|
||||
arguments(CUSTOM_PATTERNS_FULL, SAMPLE2),
|
||||
@ -201,7 +213,7 @@ public class TestListFormat {
|
||||
};
|
||||
}
|
||||
|
||||
static Arguments[] getInstance_3Arg_InheritPatterns() {
|
||||
private static Arguments[] getInstance_3Arg_InheritPatterns() {
|
||||
return new Arguments[] {
|
||||
arguments(ListFormat.Type.STANDARD, ListFormat.Style.FULL),
|
||||
arguments(ListFormat.Type.STANDARD, ListFormat.Style.SHORT),
|
||||
@ -215,7 +227,7 @@ public class TestListFormat {
|
||||
};
|
||||
}
|
||||
|
||||
static Arguments[] getLocale_localeDependent() {
|
||||
private static Arguments[] getLocale_localeDependent() {
|
||||
return new Arguments[] {
|
||||
arguments(Locale.ROOT),
|
||||
arguments(Locale.US),
|
||||
@ -225,6 +237,16 @@ public class TestListFormat {
|
||||
};
|
||||
}
|
||||
|
||||
private static Arguments[] getInstance_1Arg_InvalidLongPattern() {
|
||||
return new Arguments[] {
|
||||
arguments(0, "start pattern is incorrect:"),
|
||||
arguments(1, "middle pattern is incorrect:"),
|
||||
arguments(2, "end pattern is incorrect:"),
|
||||
arguments(3, "pattern for two is incorrect:"),
|
||||
arguments(4, "pattern for three is incorrect:"),
|
||||
};
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource
|
||||
void getInstance_1Arg(String[] patterns, List<String> input, String expected) throws ParseException {
|
||||
@ -240,6 +262,25 @@ public class TestListFormat {
|
||||
assertEquals(errorMsg, ex.getMessage());
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource
|
||||
void getInstance_1Arg_InvalidLongPattern(int index, String expected) {
|
||||
var patterns = new String[]{
|
||||
"{0}, {1}",
|
||||
"{0}, {1}",
|
||||
"{0}, and {1}",
|
||||
"{0} and {1}",
|
||||
"{0} {1} {2}"
|
||||
};
|
||||
patterns[index] = "{0}".repeat(100_000);
|
||||
|
||||
// Ensures validation of invalid long patterns completes without timing out
|
||||
var msg = assertThrows(IllegalArgumentException.class,
|
||||
() -> ListFormat.getInstance(patterns))
|
||||
.getMessage();
|
||||
assertEquals(expected, msg.substring(0, Math.min(msg.length(), expected.length())));
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource
|
||||
void getInstance_3Arg(Locale l, ListFormat.Type type, ListFormat.Style style, String expected, boolean roundTrip) throws ParseException {
|
||||
@ -348,6 +389,7 @@ public class TestListFormat {
|
||||
// should be inherited from parent locales.
|
||||
Locale.availableLocales().forEach(l -> ListFormat.getInstance(l, type, style));
|
||||
}
|
||||
|
||||
@Test
|
||||
void getInstance_3Arg_InheritanceValidation() {
|
||||
// Tests if inheritance works as expected.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user