From 4192f9bf2edb14e6449e922a8bb27ec14c1003d4 Mon Sep 17 00:00:00 2001 From: Naoto Sato Date: Tue, 2 Jun 2026 20:27:12 +0000 Subject: [PATCH] 8385736: Optimize ListFormat custom pattern parsing Reviewed-by: jlu --- .../share/classes/java/text/ListFormat.java | 162 ++++++++++++------ .../Format/ListFormat/TestListFormat.java | 58 ++++++- 2 files changed, 163 insertions(+), 57 deletions(-) diff --git a/src/java.base/share/classes/java/text/ListFormat.java b/src/java.base/share/classes/java/text/ListFormat.java index 5cd0e9e3651..3f320bbcc9b 100644 --- a/src/java.base/share/classes/java/text/ListFormat.java +++ b/src/java.base/share/classes/java/text/ListFormat.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,7 +32,6 @@ import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.Objects; -import java.util.regex.Pattern; import java.util.stream.IntStream; import sun.util.locale.provider.LocaleProviderAdapter; @@ -112,6 +111,7 @@ public final class ListFormat extends Format { private static final int TWO = 3; private static final int THREE = 4; private static final int PATTERN_ARRAY_LENGTH = THREE + 1; + private static final int PLACEHOLDER_LENGTH = 3; // i.e., "{i}".length() /** * The locale to use for formatting list patterns. @@ -126,14 +126,11 @@ public final class ListFormat extends Format { */ private final String[] patterns; - private static final Pattern PARSE_START = Pattern.compile("(.*?)\\{0}(.*?)\\{1}"); - private static final Pattern PARSE_MIDDLE = Pattern.compile("\\{0}(.*?)\\{1}"); - private static final Pattern PARSE_END = Pattern.compile("\\{0}(.*?)\\{1}(.*?)"); - private static final Pattern PARSE_TWO = Pattern.compile("(.*?)\\{0}(.*?)\\{1}(.*?)"); - private static final Pattern PARSE_THREE = Pattern.compile("(.*?)\\{0}(.*?)\\{1}(.*?)\\{2}(.*?)"); - private transient Pattern startPattern; + private transient String startBefore; + private transient String startBetween; private transient String middleBetween; - private transient Pattern endPattern; + private transient String endBetween; + private transient String endAfter; private ListFormat(Locale l, String[] patterns) { locale = l; @@ -149,50 +146,60 @@ public final class ListFormat extends Format { } } - // get pattern strings - var m = PARSE_START.matcher(patterns[START]); - String startBefore; - String startBetween; - if (m.matches()) { - startBefore = m.group(1); - startBetween = m.group(2); + // Get pattern strings. Pattern conditions from LDML are: + // - it contains the placeholders {0}, {1}, and {2} ("3"-pattern only) in order + // - "start" and "middle" patterns end with the {1} placeholder + // - "middle" and "end" patterns begin with the {0} placeholder + var pattern = patterns[START]; + var placeholderPositions = findPlaceholders(pattern); + if (placeholderPositions != null && + placeholderPositions[1] + PLACEHOLDER_LENGTH == pattern.length()) { + startBefore = pattern.substring(0, placeholderPositions[0]); + startBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH, + placeholderPositions[1]); } else { - throw new IllegalArgumentException("start pattern is incorrect: " + patterns[START]); + throw new IllegalArgumentException("start pattern is incorrect: " + pattern); } - m = PARSE_MIDDLE.matcher(patterns[MIDDLE]); - if (m.matches()) { - middleBetween = m.group(1); + + pattern = patterns[MIDDLE]; + placeholderPositions = findPlaceholders(pattern); + if (placeholderPositions != null && + placeholderPositions[0] == 0 && + placeholderPositions[1] + PLACEHOLDER_LENGTH == pattern.length()) { + middleBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH, + placeholderPositions[1]); } else { - throw new IllegalArgumentException("middle pattern is incorrect: " + patterns[MIDDLE]); + throw new IllegalArgumentException("middle pattern is incorrect: " + pattern); } - m = PARSE_END.matcher(patterns[END]); - String endBetween; - String endAfter; - if (m.matches()) { - endBetween = m.group(1); - endAfter = m.group(2); + + pattern = patterns[END]; + placeholderPositions = findPlaceholders(pattern); + if (placeholderPositions != null && placeholderPositions[0] == 0) { + endBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH, + placeholderPositions[1]); + endAfter = pattern.substring(placeholderPositions[1] + PLACEHOLDER_LENGTH); } else { - throw new IllegalArgumentException("end pattern is incorrect: " + patterns[END]); + throw new IllegalArgumentException("end pattern is incorrect: " + pattern); } // Validate two/three patterns, if given. Otherwise, generate them - if (!patterns[TWO].isEmpty()) { - if (!PARSE_TWO.matcher(patterns[TWO]).matches()) { - throw new IllegalArgumentException("pattern for two is incorrect: " + patterns[TWO]); + pattern = patterns[TWO]; + if (!pattern.isEmpty()) { + if (findPlaceholders(pattern) == null) { + throw new IllegalArgumentException("pattern for two is incorrect: " + pattern); } } else { patterns[TWO] = startBefore + "{0}" + endBetween + "{1}" + endAfter; } - if (!patterns[THREE].isEmpty()) { - if (!PARSE_THREE.matcher(patterns[THREE]).matches()) { - throw new IllegalArgumentException("pattern for three is incorrect: " + patterns[THREE]); + pattern = patterns[THREE]; + if (!pattern.isEmpty()) { + placeholderPositions = findPlaceholders(pattern); + if (placeholderPositions == null || placeholderPositions[2] == -1) { + throw new IllegalArgumentException("pattern for three is incorrect: " + pattern); } } else { patterns[THREE] = startBefore + "{0}" + startBetween + "{1}" + endBetween + "{2}" + endAfter; } - - startPattern = Pattern.compile(startBefore + "(.+?)" + startBetween); - endPattern = Pattern.compile(endBetween + "(.+?)" + endAfter); } /** @@ -455,21 +462,29 @@ public final class ListFormat extends Format { public Object parseObject(String source, ParsePosition parsePos) { Objects.requireNonNull(source); Objects.requireNonNull(parsePos); - var sm = startPattern.matcher(source); - var em = endPattern.matcher(source); + var startPattern = findPattern(source, parsePos.getIndex(), startBefore, startBetween); + var endPattern = findPattern(source, parsePos.getIndex(), endBetween, endAfter); Object parsed = null; - if (sm.find(parsePos.getIndex()) && em.find(parsePos.getIndex())) { - // get em to the last - var c = em.start(); - while (em.find()) { - c = em.start(); + if (startPattern != null && endPattern != null) { + // get endPattern to the last + var ep = endPattern; + while ((ep = findPattern(source, ep[1], endBetween, endAfter)) != null) { + endPattern = ep; } - em.find(c); - var startEnd = sm.end(); - var endStart = em.start(); + + var startEnd = startPattern[1]; + var endStart = endPattern[0]; if (startEnd <= endStart) { var mid = source.substring(startEnd, endStart); - var count = mid.split(middleBetween).length + 2; + var count = 3; + var mbLength = middleBetween.length(); + if (mbLength > 0) { + var midIndex = 0; + while ((midIndex = mid.indexOf(middleBetween, midIndex)) >= 0) { + count++; + midIndex += mbLength; + } + } parsed = new MessageFormat(createMessageFormatString(count), locale).parseObject(source, parsePos); } } @@ -565,7 +580,9 @@ public final class ListFormat extends Format { private String createMessageFormatString(int count) { var sb = new StringBuilder(256).append(patterns[START]); IntStream.range(2, count - 1).forEach(i -> sb.append(middleBetween).append("{").append(i).append("}")); - sb.append(patterns[END].replaceFirst("\\{0}", "").replaceFirst("\\{1}", "\\{" + (count - 1) + "\\}")); + sb.append(endBetween) + .append("{").append(count - 1).append("}") + .append(endAfter); return sb.toString(); } @@ -643,4 +660,51 @@ public final class ListFormat extends Format { */ NARROW } + + /** + * {@return the positions of the "{0}", "{1}", and "{2}" placeholders in the + * given pattern string, or null if the pattern is invalid} + * + * The returned array contains -1 for "{2}" if that placeholder is absent. + * + * @param pattern pattern string to parse + */ + private static int[] findPlaceholders(String pattern) { + var positions = new int[3]; + for (int i = 0; i < positions.length; i++) { + positions[i] = pattern.indexOf("{" + i + "}"); + } + + // Check the existence and order of the placeholders + if (positions[0] == -1 || + positions[1] == -1 || + positions[0] + PLACEHOLDER_LENGTH > positions[1] || + positions[2] != -1 && positions[1] + PLACEHOLDER_LENGTH > positions[2]) { + return null; + } + + return positions; + } + + /** + * {@return the start and end positions of the first pattern found in + * the given {@code source} starting at {@code pos}, or null if no such + * pattern exists} + * + * The pattern must contain at least one character between the + * {@code prefix} and {@code suffix} strings. The returned end position is + * exclusive. + * + * @param source string to search + * @param pos position at which to start the search + * @param prefix starting string within the pattern + * @param suffix ending string within the pattern + */ + private static int[] findPattern(String source, int pos, String prefix, String suffix) { + var prefixPos = source.indexOf(prefix, pos); + var suffixPos = prefixPos != -1 ? source.indexOf(suffix, prefixPos + prefix.length() + 1) : -1; + + return prefixPos < suffixPos ? + new int[] {prefixPos, suffixPos + suffix.length()} : null; + } } diff --git a/test/jdk/java/text/Format/ListFormat/TestListFormat.java b/test/jdk/java/text/Format/ListFormat/TestListFormat.java index bcccc2873f5..1500a1b0818 100644 --- a/test/jdk/java/text/Format/ListFormat/TestListFormat.java +++ b/test/jdk/java/text/Format/ListFormat/TestListFormat.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,7 +23,7 @@ /* * @test - * @bug 8041488 8316974 8318569 8306116 + * @bug 8041488 8316974 8318569 8306116 8385736 * @summary Tests for ListFormat class * @run junit TestListFormat */ @@ -66,6 +66,14 @@ public class TestListFormat { "", "", }; + // Ensures regex metacharacters in custom patterns are treated as literals. + private static final String[] CUSTOM_PATTERNS_METACHAR = { + ". {0} * {1}", + "{0} + {1}", + "{0} | {1} [", + "", + "", + }; private static final String[] CUSTOM_PATTERNS_IAE_START = { "{0}", "{0} mid {1}", @@ -120,7 +128,7 @@ public class TestListFormat { assertEquals(ListFormat.getInstance(), ListFormat.getInstance(Locale.getDefault(Locale.Category.FORMAT), ListFormat.Type.STANDARD, ListFormat.Style.FULL)); } - static Arguments[] getInstance_1Arg() { + private static Arguments[] getInstance_1Arg() { return new Arguments[] { arguments(CUSTOM_PATTERNS_FULL, SAMPLE1, "foo"), arguments(CUSTOM_PATTERNS_FULL, SAMPLE2, "twobef foo two bar twoaft"), @@ -130,10 +138,14 @@ public class TestListFormat { arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE2, "sbef foo ebet bar eaft"), arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE3, "sbef foo sbet bar ebet baz eaft"), arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE4, "sbef foo sbet bar mid baz ebet qux eaft"), + arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE1, "foo"), + arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE2, ". foo | bar ["), + arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE3, ". foo * bar | baz ["), + arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE4, ". foo * bar + baz | qux ["), }; } - static Arguments[] getInstance_1Arg_IAE() { + private static Arguments[] getInstance_1Arg_IAE() { return new Arguments[] { arguments(new String[1], "Pattern array length should be 5"), arguments(new String[6], "Pattern array length should be 5"), @@ -146,7 +158,7 @@ public class TestListFormat { }; } - static Arguments[] getInstance_3Arg() { + private static Arguments[] getInstance_3Arg() { return new Arguments[] { arguments(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.FULL, "foo, bar, and baz", true), @@ -188,7 +200,7 @@ public class TestListFormat { }; } - static Arguments[] parseObject_parsePos() { + private static Arguments[] parseObject_parsePos() { return new Arguments[] { arguments(CUSTOM_PATTERNS_FULL, SAMPLE1), arguments(CUSTOM_PATTERNS_FULL, SAMPLE2), @@ -201,7 +213,7 @@ public class TestListFormat { }; } - static Arguments[] getInstance_3Arg_InheritPatterns() { + private static Arguments[] getInstance_3Arg_InheritPatterns() { return new Arguments[] { arguments(ListFormat.Type.STANDARD, ListFormat.Style.FULL), arguments(ListFormat.Type.STANDARD, ListFormat.Style.SHORT), @@ -215,7 +227,7 @@ public class TestListFormat { }; } - static Arguments[] getLocale_localeDependent() { + private static Arguments[] getLocale_localeDependent() { return new Arguments[] { arguments(Locale.ROOT), arguments(Locale.US), @@ -225,6 +237,16 @@ public class TestListFormat { }; } + private static Arguments[] getInstance_1Arg_InvalidLongPattern() { + return new Arguments[] { + arguments(0, "start pattern is incorrect:"), + arguments(1, "middle pattern is incorrect:"), + arguments(2, "end pattern is incorrect:"), + arguments(3, "pattern for two is incorrect:"), + arguments(4, "pattern for three is incorrect:"), + }; + } + @ParameterizedTest @MethodSource void getInstance_1Arg(String[] patterns, List input, String expected) throws ParseException { @@ -240,6 +262,25 @@ public class TestListFormat { assertEquals(errorMsg, ex.getMessage()); } + @ParameterizedTest + @MethodSource + void getInstance_1Arg_InvalidLongPattern(int index, String expected) { + var patterns = new String[]{ + "{0}, {1}", + "{0}, {1}", + "{0}, and {1}", + "{0} and {1}", + "{0} {1} {2}" + }; + patterns[index] = "{0}".repeat(100_000); + + // Ensures validation of invalid long patterns completes without timing out + var msg = assertThrows(IllegalArgumentException.class, + () -> ListFormat.getInstance(patterns)) + .getMessage(); + assertEquals(expected, msg.substring(0, Math.min(msg.length(), expected.length()))); + } + @ParameterizedTest @MethodSource void getInstance_3Arg(Locale l, ListFormat.Type type, ListFormat.Style style, String expected, boolean roundTrip) throws ParseException { @@ -348,6 +389,7 @@ public class TestListFormat { // should be inherited from parent locales. Locale.availableLocales().forEach(l -> ListFormat.getInstance(l, type, style)); } + @Test void getInstance_3Arg_InheritanceValidation() { // Tests if inheritance works as expected.