mirror of
https://github.com/openjdk/jdk.git
synced 2026-05-27 05:42:24 +00:00
8027645: Pattern.split() with positive lookahead
6559590: Pattern.compile(".*").split("") returns incorrect result
Updated spec/impl for these two corner cases
Reviewed-by: alanb, psandoz
This commit is contained in:
parent
8280dc2170
commit
4208726b35
@ -2235,7 +2235,13 @@ public final class String
|
||||
* expression or is terminated by the end of the string. The substrings in
|
||||
* the array are in the order in which they occur in this string. If the
|
||||
* expression does not match any part of the input then the resulting array
|
||||
* has just one element, namely this string.
|
||||
* has just one element, namely this string. A zero-length input sequence
|
||||
* always results zero-length resulting array.
|
||||
*
|
||||
* <p> When there is a positive-width match at the beginning of this
|
||||
* string then an empty leading substring is included at the beginning
|
||||
* of the resulting array. A zero-width match at the beginning however
|
||||
* never produces such empty leading substring.
|
||||
*
|
||||
* <p> The {@code limit} parameter controls the number of times the
|
||||
* pattern is applied and therefore affects the length of the resulting
|
||||
@ -2325,6 +2331,8 @@ public final class String
|
||||
(ch < Character.MIN_HIGH_SURROGATE ||
|
||||
ch > Character.MAX_LOW_SURROGATE))
|
||||
{
|
||||
if (value.length == 0)
|
||||
return new String[0];
|
||||
int off = 0;
|
||||
int next = 0;
|
||||
boolean limited = limit > 0;
|
||||
|
||||
@ -1142,9 +1142,15 @@ public final class Pattern
|
||||
* input sequence that is terminated by another subsequence that matches
|
||||
* this pattern or is terminated by the end of the input sequence. The
|
||||
* substrings in the array are in the order in which they occur in the
|
||||
* input. If this pattern does not match any subsequence of the input then
|
||||
* input. If this pattern does not match any subsequence of the input then
|
||||
* the resulting array has just one element, namely the input sequence in
|
||||
* string form.
|
||||
* string form. A zero-length input sequence always results zero-length
|
||||
* resulting array.
|
||||
*
|
||||
* <p> When there is a positive-width match at the beginning of the input
|
||||
* sequence then an empty leading substring is included at the beginning
|
||||
* of the resulting array. A zero-width match at the beginning however
|
||||
* never produces such empty leading substring.
|
||||
*
|
||||
* <p> The <tt>limit</tt> parameter controls the number of times the
|
||||
* pattern is applied and therefore affects the length of the resulting
|
||||
@ -1185,7 +1191,6 @@ public final class Pattern
|
||||
* <td><tt>{ "b", "", ":and:f" }</tt></td></tr>
|
||||
* </table></blockquote>
|
||||
*
|
||||
*
|
||||
* @param input
|
||||
* The character sequence to be split
|
||||
*
|
||||
@ -1196,6 +1201,8 @@ public final class Pattern
|
||||
* around matches of this pattern
|
||||
*/
|
||||
public String[] split(CharSequence input, int limit) {
|
||||
if (input.length() == 0)
|
||||
return new String[0];
|
||||
int index = 0;
|
||||
boolean matchLimited = limit > 0;
|
||||
ArrayList<String> matchList = new ArrayList<>();
|
||||
@ -1204,6 +1211,11 @@ public final class Pattern
|
||||
// Add segments before each match found
|
||||
while(m.find()) {
|
||||
if (!matchLimited || matchList.size() < limit - 1) {
|
||||
if (index == 0 && index == m.start() && m.start() == m.end()) {
|
||||
// no empty leading substring included for zero-width match
|
||||
// at the beginning of the input char sequence.
|
||||
continue;
|
||||
}
|
||||
String match = input.subSequence(index, m.start()).toString();
|
||||
matchList.add(match);
|
||||
index = m.end();
|
||||
@ -5762,6 +5774,13 @@ NEXT: while (i <= last) {
|
||||
* the resulting stream has just one element, namely the input sequence in
|
||||
* string form.
|
||||
*
|
||||
* <p> A zero-length input sequence always results an empty stream.
|
||||
*
|
||||
* <p> When there is a positive-width match at the beginning of the input
|
||||
* sequence then an empty leading substring is included at the beginning
|
||||
* of the stream. A zero-width match at the beginning however never produces
|
||||
* such empty leading substring.
|
||||
*
|
||||
* <p> If the input sequence is mutable, it must remain constant during the
|
||||
* execution of the terminal stream operation. Otherwise, the result of the
|
||||
* terminal stream operation is undefined.
|
||||
@ -5817,7 +5836,8 @@ NEXT: while (i <= last) {
|
||||
current = matcher.end();
|
||||
if (!nextElement.isEmpty()) {
|
||||
return true;
|
||||
} else {
|
||||
} else if (current > 0) { // no empty leading substring for zero-width
|
||||
// match at the beginning of the input
|
||||
emptyElementCount++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 6840246
|
||||
* @bug 6840246 6559590
|
||||
* @summary test String.split()
|
||||
*/
|
||||
import java.util.Arrays;
|
||||
@ -78,12 +78,11 @@ public class Split {
|
||||
throw new RuntimeException("String.split failure 7");
|
||||
}
|
||||
// Check the case for limit == 0, source = "";
|
||||
// split() now returns 0-length for empty source "" see #6559590
|
||||
source = "";
|
||||
String[] result = source.split("e", 0);
|
||||
if (result.length != 1)
|
||||
if (result.length != 0)
|
||||
throw new RuntimeException("String.split failure 8");
|
||||
if (!result[0].equals(source))
|
||||
throw new RuntimeException("String.split failure 9");
|
||||
|
||||
// check fastpath of String.split()
|
||||
source = "0123456789abcdefgABCDEFG";
|
||||
|
||||
@ -33,7 +33,8 @@
|
||||
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
|
||||
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
|
||||
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
|
||||
* 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647
|
||||
* 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
|
||||
* 8027645
|
||||
*/
|
||||
|
||||
import java.util.regex.*;
|
||||
@ -148,6 +149,7 @@ public class RegExTest {
|
||||
groupCurlyNotFoundSuppTest();
|
||||
groupCurlyBackoffTest();
|
||||
patternAsPredicate();
|
||||
|
||||
if (failure) {
|
||||
throw new
|
||||
RuntimeException("RegExTest failed, 1st failure: " +
|
||||
@ -1776,13 +1778,68 @@ public class RegExTest {
|
||||
failCount++;
|
||||
}
|
||||
// Check the case for limit == 0, source = "";
|
||||
// split() now returns 0-length for empty source "" see #6559590
|
||||
source = "";
|
||||
result = source.split("e", 0);
|
||||
if (result.length != 1)
|
||||
failCount++;
|
||||
if (!result[0].equals(source))
|
||||
if (result.length != 0)
|
||||
failCount++;
|
||||
|
||||
// Check both split() and splitAsStraem(), especially for zero-lenth
|
||||
// input and zero-lenth match cases
|
||||
String[][] input = new String[][] {
|
||||
{ " ", "Abc Efg Hij" }, // normal non-zero-match
|
||||
{ " ", " Abc Efg Hij" }, // leading empty str for non-zero-match
|
||||
{ " ", "Abc Efg Hij" }, // non-zero-match in the middle
|
||||
{ "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match
|
||||
{ "(?=\\p{Lu})", "AbcEfg" },
|
||||
{ "(?=\\p{Lu})", "Abc" },
|
||||
{ " ", "" }, // zero-length input
|
||||
{ ".*", "" },
|
||||
|
||||
// some tests from PatternStreamTest.java
|
||||
{ "4", "awgqwefg1fefw4vssv1vvv1" },
|
||||
{ "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
|
||||
{ "1", "awgqwefg1fefw4vssv1vvv1" },
|
||||
{ "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
|
||||
{ "\u56da", "1\u56da23\u56da456\u56da7890" },
|
||||
{ "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
|
||||
{ "\u56da", "" },
|
||||
{ "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
|
||||
{ "o", "boo:and:foo" },
|
||||
{ "o", "booooo:and:fooooo" },
|
||||
{ "o", "fooooo:" },
|
||||
};
|
||||
|
||||
String[][] expected = new String[][] {
|
||||
{ "Abc", "Efg", "Hij" },
|
||||
{ "", "Abc", "Efg", "Hij" },
|
||||
{ "Abc", "", "Efg", "Hij" },
|
||||
{ "Abc", "Efg", "Hij" },
|
||||
{ "Abc", "Efg" },
|
||||
{ "Abc" },
|
||||
{},
|
||||
{},
|
||||
|
||||
{ "awgqwefg1fefw", "vssv1vvv1" },
|
||||
{ "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
|
||||
{ "awgqwefg", "fefw4vssv", "vvv" },
|
||||
{ "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
|
||||
{ "1", "23", "456", "7890" },
|
||||
{ "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
|
||||
{},
|
||||
{ "This", "is", "testing", "", "with", "different", "separators" },
|
||||
{ "b", "", ":and:f" },
|
||||
{ "b", "", "", "", "", ":and:f" },
|
||||
{ "f", "", "", "", "", ":" },
|
||||
};
|
||||
for (int i = 0; i < input.length; i++) {
|
||||
pattern = Pattern.compile(input[i][0]);
|
||||
if (!Arrays.equals(pattern.split(input[i][1]), expected[i]))
|
||||
failCount++;
|
||||
if (!Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
|
||||
expected[i]))
|
||||
failCount++;
|
||||
}
|
||||
report("Split");
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user