+ * Group name
+ * A capturing group can also be assigned a "name", a named-capturing group,
+ * and then be back-referenced later by the "name". Group names are composed of
+ * the following characters:
+ *
+ *
+ * - The uppercase letters 'A' through 'Z'
+ * ('\u0041' through '\u005a'),
+ *
- The lowercase letters 'a' through 'z'
+ * ('\u0061' through '\u007a'),
+ *
- The digits '0' through '9'
+ * ('\u0030' through '\u0039'),
+ *
+ *
+ * A named-capturing group is still numbered as described in
+ * Group number.
+ *
*
The captured input associated with a group is always the subsequence
* that the group most recently matched. If a group is evaluated a second time
* because of quantification then its previously-captured value, if any, will
@@ -479,9 +506,9 @@ import java.util.Arrays;
* group two set to "b". All captured input is discarded at the
* beginning of each match.
*
- *
Groups beginning with (? are pure, non-capturing groups
- * that do not capture text and do not count towards the group total.
- *
+ *
Groups beginning with (? are either pure, non-capturing groups
+ * that do not capture text and do not count towards the group total, or
+ * named-capturing group.
*
*
Unicode support
*
@@ -794,6 +821,12 @@ public final class Pattern
*/
transient int[] buffer;
+ /**
+ * Map the "name" of the "named capturing group" to its group id
+ * node.
+ */
+ transient volatile Map namedGroups;
+
/**
* Temporary storage used while parsing group references.
*/
@@ -1467,6 +1500,7 @@ loop: for(int x=0, offset=0; x namedGroups() {
+ if (namedGroups == null)
+ namedGroups = new HashMap(2);
+ return namedGroups;
+ }
+
/**
* Used to print out a subtree of the Pattern to help with debugging.
*/
@@ -2156,7 +2196,22 @@ loop: for(int x=0, offset=0; x does not exit");
+ if (create) {
+ if (has(CASE_INSENSITIVE))
+ root = new CIBackRef(namedGroups().get(name), has(UNICODE_CASE));
+ else
+ root = new BackRef(namedGroups().get(name));
+ }
+ return -1;
case 'l':
case 'm':
break;
@@ -2455,6 +2510,24 @@ loop: for(int x=0, offset=0; x" is consumed after parsing.
+ */
+ private String groupname(int ch) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(Character.toChars(ch));
+ while (ASCII.isLower(ch=read()) || ASCII.isUpper(ch) ||
+ ASCII.isDigit(ch)) {
+ sb.append(Character.toChars(ch));
+ }
+ if (sb.length() == 0)
+ throw error("named capturing group has 0 length name");
+ if (ch != '>')
+ throw error("named capturing group is missing trailing '>'");
+ return sb.toString();
+ }
+
/**
* Parses a group and returns the head node of a set of nodes that process
* the group. Sometimes a double return system is used where the tail is
@@ -2494,6 +2567,18 @@ loop: for(int x=0, offset=0; x is already defined");
+ capturingGroup = true;
+ head = createGroup(false);
+ tail = root;
+ namedGroups().put(name, capturingGroupCount-1);
+ head.next = expr(tail);
+ break;
+ }
int start = cursor;
head = createGroup(true);
tail = root;
diff --git a/jdk/test/java/util/regex/BMPTestCases.txt b/jdk/test/java/util/regex/BMPTestCases.txt
new file mode 100644
index 00000000000..c50f49628e3
--- /dev/null
+++ b/jdk/test/java/util/regex/BMPTestCases.txt
@@ -0,0 +1,951 @@
+//
+// Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//
+//
+// This file contains test cases with BMP characters for regular expressions.
+// A test case consists of three lines:
+// The first line is a pattern used in the test
+// The second line is the input to search for the pattern in
+// The third line is a concatentation of the match, the number of groups,
+// and the contents of the first four subexpressions.
+// Empty lines and lines beginning with comment slashes are ignored.
+
+// Test unsetting of backed off groups
+^(\u3042)?\u3042
+\u3042
+true \u3042 1
+
+^(\u3042\u3042(\u3043\u3043)?)+$
+\u3042\u3042\u3043\u3043\u3042\u3042
+true \u3042\u3042\u3043\u3043\u3042\u3042 2 \u3042\u3042 \u3043\u3043
+
+((\u3042|\u3043)?\u3043)+
+\u3043
+true \u3043 2 \u3043
+
+(\u3042\u3042\u3042)?\u3042\u3042\u3042
+\u3042\u3042\u3042
+true \u3042\u3042\u3042 1
+
+^(\u3042(\u3043)?)+$
+\u3042\u3043\u3042
+true \u3042\u3043\u3042 2 \u3042 \u3043
+
+^(\u3042(\u3043(\u3044)?)?)?\u3042\u3043\u3044
+\u3042\u3043\u3044
+true \u3042\u3043\u3044 3
+
+^(\u3042(\u3043(\u3044))).*
+\u3042\u3043\u3044
+true \u3042\u3043\u3044 3 \u3042\u3043\u3044 \u3043\u3044 \u3044
+
+// use of x modifier
+\u3042\u3043\u3044(?x)\u3043la\u3049
+\u3042\u3043\u3044\u3043la\u3049
+true \u3042\u3043\u3044\u3043la\u3049 0
+
+\u3042\u3043\u3044(?x) bla\u3049
+\u3042\u3043\u3044bla\u3049
+true \u3042\u3043\u3044bla\u3049 0
+
+\u3042\u3043\u3044(?x) bla\u3049 ble\u3044\u3049
+\u3042\u3043\u3044bla\u3049ble\u3044\u3049
+true \u3042\u3043\u3044bla\u3049ble\u3044\u3049 0
+
+\u3042\u3043\u3044(?x) bla\u3049 # ignore comment
+\u3042\u3043\u3044bla\u3049
+true \u3042\u3043\u3044bla\u3049 0
+
+// Simple alternation
+\u3042|\u3043
+\u3042
+true \u3042 0
+
+\u3042|\u3043
+\u305B
+false 0
+
+\u3042|\u3043
+\u3043
+true \u3043 0
+
+\u3042|\u3043|\u3044\u3045
+\u3044\u3045
+true \u3044\u3045 0
+
+\u3042|\u3042\u3045
+\u3042\u3045
+true \u3042 0
+
+\u305B(\u3042|\u3042\u3044)\u3043
+\u305B\u3042\u3044\u3043
+true \u305B\u3042\u3044\u3043 1 \u3042\u3044
+
+// Simple char class
+[\u3042\u3043\u3044]+
+\u3042\u3043\u3042\u3043\u3042\u3043
+true \u3042\u3043\u3042\u3043\u3042\u3043 0
+
+[\u3042\u3043\u3044]+
+\u3045\u3046\u3047\u3048
+false 0
+
+[\u3042\u3043\u3044]+[\u3045\u3046\u3047]+[\u3048\u3049\u304A]+
+\u305B\u305B\u305B\u3042\u3042\u3045\u3045\u3048\u3048\u305B\u305B\u305B
+true \u3042\u3042\u3045\u3045\u3048\u3048 0
+
+// Range char class
+[\u3042-\u3048]+
+\u305B\u305B\u305B\u3048\u3048\u3048
+true \u3048\u3048\u3048 0
+
+[\u3042-\u3048]+
+mmm
+false 0
+
+[\u3042-]+
+\u305B\u3042-9\u305B
+true \u3042- 0
+
+[\u3042-\\u4444]+
+\u305B\u3042-9\u305B
+true \u305B\u3042 0
+
+// Negated char class
+[^\u3042\u3043\u3044]+
+\u3042\u3043\u3042\u3043\u3042\u3043
+false 0
+
+[^\u3042\u3043\u3044]+
+\u3042\u3042\u3042\u3043\u3043\u3043\u3044\u3044\u3044\u3045\u3046\u3047\u3048
+true \u3045\u3046\u3047\u3048 0
+
+// Making sure a ^ not in first position matches literal ^
+[\u3042\u3043\u3044^\u3043]
+\u3043
+true \u3043 0
+
+[\u3042\u3043\u3044^\u3043]
+^
+true ^ 0
+
+// Class union and intersection
+[\u3042\u3043\u3044[\u3045\u3046\u3047]]
+\u3043
+true \u3043 0
+
+[\u3042\u3043\u3044[\u3045\u3046\u3047]]
+\u3046
+true \u3046 0
+
+[\u3042-\u3045[0-9][\u304e-\u3051]]
+\u3042
+true \u3042 0
+
+[\u3042-\u3045[0-9][\u304e-\u3051]]
+\u3050
+true \u3050 0
+
+[\u3042-\u3045[0-9][\u304e-\u3051]]
+4
+true 4 0
+
+[\u3042-\u3045[0-9][\u304e-\u3051]]
+\u3046
+false 0
+
+[\u3042-\u3045[0-9][\u304e-\u3051]]
+\u3056
+false 0
+
+[[\u3042-\u3045][0-9][\u304e-\u3051]]
+\u3043
+true \u3043 0
+
+[[\u3042-\u3045][0-9][\u304e-\u3051]]
+\u305B
+false 0
+
+[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
+\u3042
+true \u3042 0
+
+[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
+\u3046
+true \u3046 0
+
+[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
+\u3049
+true \u3049 0
+
+[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]]
+m
+false 0
+
+[\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]m]
+m
+true m 0
+
+[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
+\u3042
+true \u3042 0
+
+[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
+\u3045
+true \u3045 0
+
+[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
+\u3049
+true \u3049 0
+
+[\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A]
+w
+false 0
+
+[\u3042-\u3044&&[\u3045-\u3047]]
+\u3042
+false 0
+
+[\u3042-\u3044&&[\u3045-\u3047]]
+\u3046
+false 0
+
+[\u3042-\u3044&&[\u3045-\u3047]]
+\u305B
+false 0
+
+[[\u3042-\u3044]&&[\u3045-\u3047]]
+\u3042
+false 0
+
+[[\u3042-\u3044]&&[\u3045-\u3047]]
+\u3046
+false 0
+
+[[\u3042-\u3044]&&[\u3045-\u3047]]
+\u305B
+false 0
+
+[\u3042-\u3044&&\u3045-\u3047]
+\u3042
+false 0
+
+[\u3042-\u304e&&\u304e-\u305B]
+\u304e
+true \u304e 0
+
+[\u3042-\u304e&&\u304e-\u305B&&\u3042-\u3044]
+\u304e
+false 0
+
+[\u3042-\u304e&&\u304e-\u305B&&\u3042-\u305B]
+\u304e
+true \u304e 0
+
+[[\u3042-\u304e]&&[\u304e-\u305B]]
+\u3042
+false 0
+
+[[\u3042-\u304e]&&[\u304e-\u305B]]
+\u304e
+true \u304e 0
+
+[[\u3042-\u304e]&&[\u304e-\u305B]]
+\u305B
+false 0
+
+[[\u3042-\u304e]&&[^\u3042-\u3044]]
+\u3042
+false 0
+
+[[\u3042-\u304e]&&[^\u3042-\u3044]]
+\u3045
+true \u3045 0
+
+[\u3042-\u304e&&[^\u3042-\u3044]]
+\u3042
+false 0
+
+[\u3042-\u304e&&[^\u3042-\u3044]]
+\u3045
+true \u3045 0
+
+[\u3042-\u3044\u3045-\u3047&&[\u3045-\u3047]]
+\u3042
+false 0
+
+[\u3042-\u3044\u3045-\u3047&&[\u3045-\u3047]]
+\u3046
+true \u3046 0
+
+[[\u3042-\u3044]&&\u3045-\u3047\u3042-\u3044]
+\u3042
+true \u3042 0
+
+[[\u3042-\u3044]&&[\u3045-\u3047][\u3042-\u3044]]
+\u3042
+true \u3042 0
+
+[[\u3042-\u3044][\u3045-\u3047]&&\u3042\u3043\u3044]
+\u3042
+true \u3042 0
+
+[[\u3042-\u3044][\u3045-\u3047]&&\u3042\u3043\u3044[\u3045\u3046\u3047]]
+\u3046
+true \u3046 0
+
+[[\u3042-\u3044]&&[\u3043-\u3045]&&[\u3044-\u3046]]
+\u3042
+false 0
+
+[[\u3042-\u3044]&&[\u3043-\u3045]&&[\u3044-\u3046]]
+\u3044
+true \u3044 0
+
+[[\u3042-\u3044]&&[\u3043-\u3045][\u3044-\u3046]&&[\u3056-\u305B]]
+\u3044
+false 0
+
+[\u3042\u3043\u3044[^\u3043\u3044\u3045]]
+\u3042
+true \u3042 0
+
+[\u3042\u3043\u3044[^\u3043\u3044\u3045]]
+\u3045
+false 0
+
+[\u3042-\u3044&&\u3042-\u3045&&\u3042-\u3046\u3048\u3049\u304A]
+\u3043
+true \u3043 0
+
+[\u3042-\u3044&&\u3042-\u3045&&\u3042-\u3046\u3048\u3049\u304A]
+\u3048
+false 0
+
+[[\u3042[\u3043]]&&[\u3043[\u3042]]]
+\u3042
+true \u3042 0
+
+[[\u3042]&&[\u3043][\u3044][\u3042]&&[^\u3045]]
+\u3042
+true \u3042 0
+
+[[\u3042]&&[b][c][\u3042]&&[^d]]
+\u3042
+true \u3042 0
+
+[[\u3042]&&[\u3043][\u3044][\u3042]&&[^\u3045]]
+\u3045
+false 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]]
+\u3042
+false 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]]
+\u3044
+true \u3044 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]]
+\u3044
+true \u3044 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044]
+\u3044
+true \u3044 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044&&\u3044]
+\u3044
+true \u3044 0
+
+[[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044&&[\u3044\u3045\u3046]]
+\u3044
+true \u3044 0
+
+[\u305B[\u3042\u3043\u3044&&\u3043\u3044\u3045]]
+\u3044
+true \u3044 0
+
+[\u305B[\u3042\u3043\u3044&&\u3043\u3044\u3045]&&[\u3056-\u305B]]
+\u305B
+true \u305B 0
+
+[\u3059[\u3042\u3043\u3044&&\u3043\u3044\u3045[\u305B]]&&[\u3056-\u305B]]
+\u305B
+false 0
+
+[\u3059[[w\u305B]\u3042\u3043\u3044&&\u3043\u3044\u3045[\u305B]]&&[\u3056-\u305B]]
+\u305B
+true \u305B 0
+
+[[\u3042\u3043\u3044]&&[\u3045\u3046\u3047]\u3042\u3043\u3044]
+\u3042
+true \u3042 0
+
+[[\u3042\u3043\u3044]&&[\u3045\u3046\u3047]\u3059\u305A\u305B[\u3042\u3043\u3044]]
+\u3042
+true \u3042 0
+
+\pL
+\u3042
+true \u3042 0
+
+\pL
+7
+false 0
+
+\p{L}
+\u3042
+true \u3042 0
+
+\p{IsL}
+\u3042
+true \u3042 0
+
+\p{InHiragana}
+\u3042
+true \u3042 0
+
+\p{InHiragana}
+\u0370
+false 0
+
+\pL\u3043\u3044
+\u3042\u3043\u3044
+true \u3042\u3043\u3044 0
+
+\u3042[r\p{InGreek}]\u3044
+\u3042\u0370\u3044
+true \u3042\u0370\u3044 0
+
+\u3042\p{InGreek}
+\u3042\u0370
+true \u3042\u0370 0
+
+\u3042\P{InGreek}
+\u3042\u0370
+false 0
+
+\u3042\P{InGreek}
+\u3042\u3043
+true \u3042\u3043 0
+
+\u3042{^InGreek}
+-
+error
+
+\u3042\p{^InGreek}
+-
+error
+
+\u3042\P{^InGreek}
+-
+error
+
+\u3042\p{InGreek}
+\u3042\u0370
+true \u3042\u0370 0
+
+\u3042[\p{InGreek}]\u3044
+\u3042\u0370\u3044
+true \u3042\u0370\u3044 0
+
+\u3042[\P{InGreek}]\u3044
+\u3042\u0370\u3044
+false 0
+
+\u3042[\P{InGreek}]\u3044
+\u3042\u3043\u3044
+true \u3042\u3043\u3044 0
+
+\u3042[{^InGreek}]\u3044
+\u3042n\u3044
+true \u3042n\u3044 0
+
+\u3042[{^InGreek}]\u3044
+\u3042\u305B\u3044
+false 0
+
+\u3042[\p{^InGreek}]\u3044
+-
+error
+
+\u3042[\P{^InGreek}]\u3044
+-
+error
+
+\u3042[\p{InGreek}]
+\u3042\u0370
+true \u3042\u0370 0
+
+\u3042[r\p{InGreek}]\u3044
+\u3042r\u3044
+true \u3042r\u3044 0
+
+\u3042[\p{InGreek}r]\u3044
+\u3042r\u3044
+true \u3042r\u3044 0
+
+\u3042[r\p{InGreek}]\u3044
+\u3042r\u3044
+true \u3042r\u3044 0
+
+\u3042[^\p{InGreek}]\u3044
+\u3042\u0370\u3044
+false 0
+
+\u3042[^\P{InGreek}]\u3044
+\u3042\u0370\u3044
+true \u3042\u0370\u3044 0
+
+\u3042[\p{InGreek}&&[^\u0370]]\u3044
+\u3042\u0370\u3044
+false 0
+
+// Test the dot metacharacter
+\u3042.\u3044.+
+\u3042#\u3044%&
+true \u3042#\u3044%& 0
+
+\u3042\u3043.
+\u3042\u3043\n
+false 0
+
+(?s)\u3042\u3043.
+\u3042\u3043\n
+true \u3042\u3043\n 0
+
+\u3042[\p{L}&&[\P{InGreek}]]\u3044
+\u3042\u6000\u3044
+true \u3042\u6000\u3044 0
+
+\u3042[\p{L}&&[\P{InGreek}]]\u3044
+\u3042r\u3044
+true \u3042r\u3044 0
+
+\u3042[\p{L}&&[\P{InGreek}]]\u3044
+\u3042\u0370\u3044
+false 0
+
+\u3042\p{InGreek}\u3044
+\u3042\u0370\u3044
+true \u3042\u0370\u3044 0
+
+\u3042\p{Sc}
+\u3042$
+true \u3042$ 0
+
+\W\w\W
+rrrr#\u3048\u3048\u3048
+false 0
+
+\u3042\u3043\u3044[\s\u3045\u3046\u3047]*
+\u3042\u3043\u3044 \u3045\u3046\u3047
+true \u3042\u3043\u3044 \u3045\u3046\u3047 0
+
+\u3042\u3043\u3044[\s\u305A-\u305B]*
+\u3042\u3043\u3044 \u305A \u305B
+true \u3042\u3043\u3044 \u305A \u305B 0
+
+\u3042\u3043\u3044[\u3042-\u3045\s\u304e-\u3051]*
+\u3042\u3043\u3044\u3042\u3042 \u304e\u304f \u3051
+true \u3042\u3043\u3044\u3042\u3042 \u304e\u304f \u3051 0
+
+// Test the whitespace escape sequence
+\u3042\u3043\s\u3044
+\u3042\u3043 \u3044
+true \u3042\u3043 \u3044 0
+
+\s\s\s
+\u3043l\u3042\u3049 \u3046rr
+false 0
+
+\S\S\s
+\u3043l\u3042\u3049 \u3046rr
+true \u3042\u3049 0
+
+// Test the digit escape sequence
+\u3042\u3043\d\u3044
+\u3042\u30439\u3044
+true \u3042\u30439\u3044 0
+
+\d\d\d
+\u3043l\u3042\u304945
+false 0
+
+// Test the caret metacharacter
+^\u3042\u3043\u3044
+\u3042\u3043\u3044\u3045\u3046\u3047
+true \u3042\u3043\u3044 0
+
+^\u3042\u3043\u3044
+\u3043\u3044\u3045\u3042\u3043\u3044
+false 0
+
+// Greedy ? metacharacter
+\u3042?\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+\u3042?\u3043
+\u3043
+true \u3043 0
+
+\u3042?\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.?\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+// Reluctant ? metacharacter
+\u3042??\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+\u3042??\u3043
+\u3043
+true \u3043 0
+
+\u3042??\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.??\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+// Possessive ? metacharacter
+\u3042?+\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+\u3042?+\u3043
+\u3043
+true \u3043 0
+
+\u3042?+\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.?+\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3043 0
+
+// Greedy + metacharacter
+\u3042+\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+\u3042+\u3043
+\u3043
+false 0
+
+\u3042+\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.+\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+// Reluctant + metacharacter
+\u3042+?\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+\u3042+?\u3043
+\u3043
+false 0
+
+\u3042+?\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.+?\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+// Possessive + metacharacter
+\u3042++\u3043
+\u3042\u3042\u3042\u3042\u3043
+true \u3042\u3042\u3042\u3042\u3043 0
+
+\u3042++\u3043
+\u3043
+false 0
+
+\u3042++\u3043
+\u3042\u3042\u3042\u3044\u3044\u3044
+false 0
+
+.++\u3043
+\u3042\u3042\u3042\u3042\u3043
+false 0
+
+// Greedy Repetition
+\u3042{2,3}
+\u3042
+false 0
+
+\u3042{2,3}
+\u3042\u3042
+true \u3042\u3042 0
+
+\u3042{2,3}
+\u3042\u3042\u3042
+true \u3042\u3042\u3042 0
+
+\u3042{2,3}
+\u3042\u3042\u3042\u3042
+true \u3042\u3042\u3042 0
+
+\u3042{3,}
+\u305B\u305B\u305B\u3042\u3042\u3042\u3042\u305B\u305B\u305B
+true \u3042\u3042\u3042\u3042 0
+
+\u3042{3,}
+\u305B\u305B\u305B\u3042\u3042\u305B\u305B\u305B
+false 0
+
+// Reluctant Repetition
+\u3042{2,3}?
+\u3042
+false 0
+
+\u3042{2,3}?
+\u3042\u3042
+true \u3042\u3042 0
+
+\u3042{2,3}?
+\u3042\u3042\u3042
+true \u3042\u3042 0
+
+\u3042{2,3}?
+\u3042\u3042\u3042\u3042
+true \u3042\u3042 0
+
+// Zero width Positive lookahead
+\u3042\u3043\u3044(?=\u3045)
+\u305B\u305B\u305B\u3042\u3043\u3044\u3045
+true \u3042\u3043\u3044 0
+
+\u3042\u3043\u3044(?=\u3045)
+\u305B\u305B\u305B\u3042\u3043\u3044\u3046\u3045
+false 0
+
+// Zero width Negative lookahead
+\u3042\u3043\u3044(?!\u3045)
+\u305B\u305B\u3042\u3043\u3044\u3045
+false 0
+
+\u3042\u3043\u3044(?!\u3045)
+\u305B\u305B\u3042\u3043\u3044\u3046\u3045
+true \u3042\u3043\u3044 0
+
+// Zero width Positive lookbehind
+\u3042(?<=\u3042)
+###\u3042\u3043\u3044
+true \u3042 0
+
+\u3042(?<=\u3042)
+###\u3043\u3044###
+false 0
+
+// Zero width Negative lookbehind
+(?3
+// So that the BM optimization is part of test
+\Q***\E\u3042\u3043\u3044
+***\u3042\u3043\u3044
+true ***\u3042\u3043\u3044 0
+
+\u3043l\Q***\E\u3042\u3043\u3044
+\u3043l***\u3042\u3043\u3044
+true \u3043l***\u3042\u3043\u3044 0
+
+\Q***\u3042\u3043\u3044
+***\u3042\u3043\u3044
+true ***\u3042\u3043\u3044 0
+
+\u3043l\u3042\u3049\Q***\E\u3042\u3043\u3044
+\u3043l\u3042\u3049***\u3042\u3043\u3044
+true \u3043l\u3042\u3049***\u3042\u3043\u3044 0
+
+\Q***\u3042\u3043\u3044
+***\u3042\u3043\u3044
+true ***\u3042\u3043\u3044 0
+
+\Q*\u3042\u3043
+*\u3042\u3043
+true *\u3042\u3043 0
+
+\u3043l\u3042\u3049\Q***\u3042\u3043\u3044
+\u3043l\u3042\u3049***\u3042\u3043\u3044
+true \u3043l\u3042\u3049***\u3042\u3043\u3044 0
+
+\u3043l\u3042\Q***\u3042\u3043\u3044
+\u3043l\u3042***\u3042\u3043\u3044
+true \u3043l\u3042***\u3042\u3043\u3044 0
+
+[\043]+
+\u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049
+true # 0
+
+[\042-\044]+
+\u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049
+true # 0
+
+[\u1234-\u1236]
+\u3043l\u3042\u3049\u3043l\u3042\u3049\u1235\u3043le\u3044\u3049
+true \u1235 0
+
+[^\043]*
+\u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049
+true \u3043l\u3042\u3049\u3043l\u3042\u3049 0
diff --git a/jdk/test/java/util/regex/RegExTest.java b/jdk/test/java/util/regex/RegExTest.java
new file mode 100644
index 00000000000..29d8b55c437
--- /dev/null
+++ b/jdk/test/java/util/regex/RegExTest.java
@@ -0,0 +1,3511 @@
+/*
+ * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @summary tests RegExp framework
+ * @author Mike McCloskey
+ * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
+ * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
+ * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
+ * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
+ * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
+ * 6350801 6676425
+ */
+
+import java.util.regex.*;
+import java.util.Random;
+import java.io.*;
+import java.util.*;
+import java.nio.CharBuffer;
+
+/**
+ * This is a test class created to check the operation of
+ * the Pattern and Matcher classes.
+ */
+public class RegExTest {
+
+ private static Random generator = new Random();
+ private static boolean failure = false;
+ private static int failCount = 0;
+
+ /**
+ * Main to interpret arguments and run several tests.
+ *
+ */
+ public static void main(String[] args) throws Exception {
+ // Most of the tests are in a file
+ processFile("TestCases.txt");
+ //processFile("PerlCases.txt");
+ processFile("BMPTestCases.txt");
+ processFile("SupplementaryTestCases.txt");
+
+ // These test many randomly generated char patterns
+ bm();
+ slice();
+
+ // These are hard to put into the file
+ escapes();
+ blankInput();
+
+ // Substitition tests on randomly generated sequences
+ globalSubstitute();
+ stringbufferSubstitute();
+ substitutionBasher();
+
+ // Canonical Equivalence
+ ceTest();
+
+ // Anchors
+ anchorTest();
+
+ // boolean match calls
+ matchesTest();
+ lookingAtTest();
+
+ // Pattern API
+ patternMatchesTest();
+
+ // Misc
+ lookbehindTest();
+ nullArgumentTest();
+ backRefTest();
+ groupCaptureTest();
+ caretTest();
+ charClassTest();
+ emptyPatternTest();
+ findIntTest();
+ group0Test();
+ longPatternTest();
+ octalTest();
+ ampersandTest();
+ negationTest();
+ splitTest();
+ appendTest();
+ caseFoldingTest();
+ commentsTest();
+ unixLinesTest();
+ replaceFirstTest();
+ gTest();
+ zTest();
+ serializeTest();
+ reluctantRepetitionTest();
+ multilineDollarTest();
+ dollarAtEndTest();
+ caretBetweenTerminatorsTest();
+ // This RFE rejected in Tiger numOccurrencesTest();
+ javaCharClassTest();
+ nonCaptureRepetitionTest();
+ notCapturedGroupCurlyMatchTest();
+ escapedSegmentTest();
+ literalPatternTest();
+ literalReplacementTest();
+ regionTest();
+ toStringTest();
+ negatedCharClassTest();
+ findFromTest();
+ boundsTest();
+ unicodeWordBoundsTest();
+ caretAtEndTest();
+ wordSearchTest();
+ hitEndTest();
+ toMatchResultTest();
+ surrogatesInClassTest();
+ namedGroupCaptureTest();
+
+ if (failure)
+ throw new RuntimeException("Failure in the RE handling.");
+ else
+ System.err.println("OKAY: All tests passed.");
+ }
+
+ // Utility functions
+
+ private static String getRandomAlphaString(int length) {
+ StringBuffer buf = new StringBuffer(length);
+ for (int i=0; i 0)
+ failure = true;
+ failCount = 0;
+ }
+
+ /**
+ * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
+ * supplementary characters. This method does NOT fully take care
+ * of the regex syntax.
+ */
+ private static String toSupplementaries(String s) {
+ int length = s.length();
+ StringBuffer sb = new StringBuffer(length * 2);
+
+ for (int i = 0; i < length; ) {
+ char c = s.charAt(i++);
+ if (c == '\\') {
+ sb.append(c);
+ if (i < length) {
+ c = s.charAt(i++);
+ sb.append(c);
+ if (c == 'u') {
+ // assume no syntax error
+ sb.append(s.charAt(i++));
+ sb.append(s.charAt(i++));
+ sb.append(s.charAt(i++));
+ sb.append(s.charAt(i++));
+ }
+ }
+ } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
+ sb.append('\ud800').append((char)('\udc00'+c));
+ } else {
+ sb.append(c);
+ }
+ }
+ return sb.toString();
+ }
+
+ // Regular expression tests
+
+ // This is for bug 6178785
+ // Test if an expected NPE gets thrown when passing in a null argument
+ private static boolean check(Runnable test) {
+ try {
+ test.run();
+ failCount++;
+ return false;
+ } catch (NullPointerException npe) {
+ return true;
+ }
+ }
+
+ private static void nullArgumentTest() {
+ check(new Runnable() { public void run() { Pattern.compile(null); }});
+ check(new Runnable() { public void run() { Pattern.matches(null, null); }});
+ check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
+ check(new Runnable() { public void run() { Pattern.quote(null);}});
+ check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
+ check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
+
+ final Matcher m = Pattern.compile("xyz").matcher("xyz");
+ m.matches();
+ check(new Runnable() { public void run() { m.appendTail(null);}});
+ check(new Runnable() { public void run() { m.replaceAll(null);}});
+ check(new Runnable() { public void run() { m.replaceFirst(null);}});
+ check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
+ check(new Runnable() { public void run() { m.reset(null);}});
+ check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
+ //check(new Runnable() { public void run() { m.usePattern(null);}});
+
+ report("Null Argument");
+ }
+
+ // This is for bug6635133
+ // Test if surrogate pair in Unicode escapes can be handled correctly.
+ private static void surrogatesInClassTest() throws Exception {
+ Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
+ Matcher matcher = pattern.matcher("\ud834\udd22");
+ if (!matcher.find())
+ failCount++;
+ }
+
+ // This is for bug 4988891
+ // Test toMatchResult to see that it is a copy of the Matcher
+ // that is not affected by subsequent operations on the original
+ private static void toMatchResultTest() throws Exception {
+ Pattern pattern = Pattern.compile("squid");
+ Matcher matcher = pattern.matcher(
+ "agiantsquidofdestinyasmallsquidoffate");
+ matcher.find();
+ int matcherStart1 = matcher.start();
+ MatchResult mr = matcher.toMatchResult();
+ if (mr == matcher)
+ failCount++;
+ int resultStart1 = mr.start();
+ if (matcherStart1 != resultStart1)
+ failCount++;
+ matcher.find();
+ int matcherStart2 = matcher.start();
+ int resultStart2 = mr.start();
+ if (matcherStart2 == resultStart2)
+ failCount++;
+ if (resultStart1 != resultStart2)
+ failCount++;
+ MatchResult mr2 = matcher.toMatchResult();
+ if (mr == mr2)
+ failCount++;
+ if (mr2.start() != matcherStart2)
+ failCount++;
+ report("toMatchResult is a copy");
+ }
+
+ // This is for bug 5013885
+ // Must test a slice to see if it reports hitEnd correctly
+ private static void hitEndTest() throws Exception {
+ // Basic test of Slice node
+ Pattern p = Pattern.compile("^squidattack");
+ Matcher m = p.matcher("squack");
+ m.find();
+ if (m.hitEnd())
+ failCount++;
+ m.reset("squid");
+ m.find();
+ if (!m.hitEnd())
+ failCount++;
+
+ // Test Slice, SliceA and SliceU nodes
+ for (int i=0; i<3; i++) {
+ int flags = 0;
+ if (i==1) flags = Pattern.CASE_INSENSITIVE;
+ if (i==2) flags = Pattern.UNICODE_CASE;
+ p = Pattern.compile("^abc", flags);
+ m = p.matcher("ad");
+ m.find();
+ if (m.hitEnd())
+ failCount++;
+ m.reset("ab");
+ m.find();
+ if (!m.hitEnd())
+ failCount++;
+ }
+
+ // Test Boyer-Moore node
+ p = Pattern.compile("catattack");
+ m = p.matcher("attack");
+ m.find();
+ if (!m.hitEnd())
+ failCount++;
+
+ p = Pattern.compile("catattack");
+ m = p.matcher("attackattackattackcatatta");
+ m.find();
+ if (!m.hitEnd())
+ failCount++;
+
+ report("hitEnd from a Slice");
+ }
+
+ // This is for bug 4997476
+ // It is weird code submitted by customer demonstrating a regression
+ private static void wordSearchTest() throws Exception {
+ String testString = new String("word1 word2 word3");
+ Pattern p = Pattern.compile("\\b");
+ Matcher m = p.matcher(testString);
+ int position = 0;
+ int start = 0;
+ while (m.find(position)) {
+ start = m.start();
+ if (start == testString.length())
+ break;
+ if (m.find(start+1)) {
+ position = m.start();
+ } else {
+ position = testString.length();
+ }
+ if (testString.substring(start, position).equals(" "))
+ continue;
+ if (!testString.substring(start, position-1).startsWith("word"))
+ failCount++;
+ }
+ report("Customer word search");
+ }
+
+ // This is for bug 4994840
+ private static void caretAtEndTest() throws Exception {
+ // Problem only occurs with multiline patterns
+ // containing a beginning-of-line caret "^" followed
+ // by an expression that also matches the empty string.
+ Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
+ Matcher matcher = pattern.matcher("\r");
+ matcher.find();
+ matcher.find();
+ report("Caret at end");
+ }
+
+ // This test is for 4979006
+ // Check to see if word boundary construct properly handles unicode
+ // non spacing marks
+ private static void unicodeWordBoundsTest() throws Exception {
+ String spaces = " ";
+ String wordChar = "a";
+ String nsm = "\u030a";
+
+ assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
+
+ Pattern pattern = Pattern.compile("\\b");
+ Matcher matcher = pattern.matcher("");
+ // S=other B=word character N=non spacing mark .=word boundary
+ // SS.BB.SS
+ String input = spaces + wordChar + wordChar + spaces;
+ twoFindIndexes(input, matcher, 2, 4);
+ // SS.BBN.SS
+ input = spaces + wordChar +wordChar + nsm + spaces;
+ twoFindIndexes(input, matcher, 2, 5);
+ // SS.BN.SS
+ input = spaces + wordChar + nsm + spaces;
+ twoFindIndexes(input, matcher, 2, 4);
+ // SS.BNN.SS
+ input = spaces + wordChar + nsm + nsm + spaces;
+ twoFindIndexes(input, matcher, 2, 5);
+ // SSN.BB.SS
+ input = spaces + nsm + wordChar + wordChar + spaces;
+ twoFindIndexes(input, matcher, 3, 5);
+ // SS.BNB.SS
+ input = spaces + wordChar + nsm + wordChar + spaces;
+ twoFindIndexes(input, matcher, 2, 5);
+ // SSNNSS
+ input = spaces + nsm + nsm + spaces;
+ matcher.reset(input);
+ if (matcher.find())
+ failCount++;
+ // SSN.BBN.SS
+ input = spaces + nsm + wordChar + wordChar + nsm + spaces;
+ twoFindIndexes(input, matcher, 3, 6);
+
+ report("Unicode word boundary");
+ }
+
+ private static void twoFindIndexes(String input, Matcher matcher, int a,
+ int b) throws Exception
+ {
+ matcher.reset(input);
+ matcher.find();
+ if (matcher.start() != a)
+ failCount++;
+ matcher.find();
+ if (matcher.start() != b)
+ failCount++;
+ }
+
+ // This test is for 6284152
+ static void check(String regex, String input, String[] expected) {
+ List result = new ArrayList();
+ Pattern p = Pattern.compile(regex);
+ Matcher m = p.matcher(input);
+ while (m.find()) {
+ result.add(m.group());
+ }
+ if (!Arrays.asList(expected).equals(result))
+ failCount++;
+ }
+
+ private static void lookbehindTest() throws Exception {
+ //Positive
+ check("(?<=%.{0,5})foo\\d",
+ "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
+ new String[]{"foo1", "foo2", "foo3"});
+
+ //boundary at end of the lookbehind sub-regex should work consistently
+ //with the boundary just after the lookbehind sub-regex
+ check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
+ check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
+ check("(?]");
+ Matcher matcher = pattern.matcher("\u203A");
+ if (!matcher.matches())
+ failCount++;
+ pattern = Pattern.compile("[^fr]");
+ matcher = pattern.matcher("a");
+ if (!matcher.find())
+ failCount++;
+ matcher.reset("\u203A");
+ if (!matcher.find())
+ failCount++;
+ String s = "for";
+ String result[] = s.split("[^fr]");
+ if (!result[0].equals("f"))
+ failCount++;
+ if (!result[1].equals("r"))
+ failCount++;
+ s = "f\u203Ar";
+ result = s.split("[^fr]");
+ if (!result[0].equals("f"))
+ failCount++;
+ if (!result[1].equals("r"))
+ failCount++;
+
+ // Test adding to bits, subtracting a node, then adding to bits again
+ pattern = Pattern.compile("[^f\u203Ar]");
+ matcher = pattern.matcher("a");
+ if (!matcher.find())
+ failCount++;
+ matcher.reset("f");
+ if (matcher.find())
+ failCount++;
+ matcher.reset("\u203A");
+ if (matcher.find())
+ failCount++;
+ matcher.reset("r");
+ if (matcher.find())
+ failCount++;
+ matcher.reset("\u203B");
+ if (!matcher.find())
+ failCount++;
+
+ // Test subtracting a node, adding to bits, subtracting again
+ pattern = Pattern.compile("[^\u203Ar\u203B]");
+ matcher = pattern.matcher("a");
+ if (!matcher.find())
+ failCount++;
+ matcher.reset("\u203A");
+ if (matcher.find())
+ failCount++;
+ matcher.reset("r");
+ if (matcher.find())
+ failCount++;
+ matcher.reset("\u203B");
+ if (matcher.find())
+ failCount++;
+ matcher.reset("\u203C");
+ if (!matcher.find())
+ failCount++;
+
+ report("Negated Character Class");
+ }
+
+ // This test is for 4628291
+ private static void toStringTest() throws Exception {
+ Pattern pattern = Pattern.compile("b+");
+ if (pattern.toString() != "b+")
+ failCount++;
+ Matcher matcher = pattern.matcher("aaabbbccc");
+ String matcherString = matcher.toString(); // unspecified
+ matcher.find();
+ matcherString = matcher.toString(); // unspecified
+ matcher.region(0,3);
+ matcherString = matcher.toString(); // unspecified
+ matcher.reset();
+ matcherString = matcher.toString(); // unspecified
+ report("toString");
+ }
+
+ // This test is for 4808962
+ private static void literalPatternTest() throws Exception {
+ int flags = Pattern.LITERAL;
+
+ Pattern pattern = Pattern.compile("abc\\t$^", flags);
+ check(pattern, "abc\\t$^", true);
+
+ pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
+ check(pattern, "abc\\t$^", true);
+
+ pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
+ check(pattern, "\\Qa^$bcabc\\E", true);
+ check(pattern, "a^$bcabc", false);
+
+ pattern = Pattern.compile("\\\\Q\\\\E");
+ check(pattern, "\\Q\\E", true);
+
+ pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
+ check(pattern, "abcefg\\Q\\Ehij", true);
+
+ pattern = Pattern.compile("\\\\\\Q\\\\E");
+ check(pattern, "\\\\\\\\", true);
+
+ pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
+ check(pattern, "\\Qa^$bcabc\\E", true);
+ check(pattern, "a^$bcabc", false);
+
+ pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
+ check(pattern, "\\Qabc\\Edef", true);
+ check(pattern, "abcdef", false);
+
+ pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
+ check(pattern, "abc\\Edef", true);
+ check(pattern, "abcdef", false);
+
+ pattern = Pattern.compile(Pattern.quote("\\E"));
+ check(pattern, "\\E", true);
+
+ pattern = Pattern.compile("((((abc.+?:)", flags);
+ check(pattern, "((((abc.+?:)", true);
+
+ flags |= Pattern.MULTILINE;
+
+ pattern = Pattern.compile("^cat$", flags);
+ check(pattern, "abc^cat$def", true);
+ check(pattern, "cat", false);
+
+ flags |= Pattern.CASE_INSENSITIVE;
+
+ pattern = Pattern.compile("abcdef", flags);
+ check(pattern, "ABCDEF", true);
+ check(pattern, "AbCdEf", true);
+
+ flags |= Pattern.DOTALL;
+
+ pattern = Pattern.compile("a...b", flags);
+ check(pattern, "A...b", true);
+ check(pattern, "Axxxb", false);
+
+ flags |= Pattern.CANON_EQ;
+
+ Pattern p = Pattern.compile("testa\u030a", flags);
+ check(pattern, "testa\u030a", false);
+ check(pattern, "test\u00e5", false);
+
+ // Supplementary character test
+ flags = Pattern.LITERAL;
+
+ pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
+ check(pattern, toSupplementaries("abc\\t$^"), true);
+
+ pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
+ check(pattern, toSupplementaries("abc\\t$^"), true);
+
+ pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
+ check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
+ check(pattern, toSupplementaries("a^$bcabc"), false);
+
+ pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
+ check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
+ check(pattern, toSupplementaries("a^$bcabc"), false);
+
+ pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
+ check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
+ check(pattern, toSupplementaries("abcdef"), false);
+
+ pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
+ check(pattern, toSupplementaries("abc\\Edef"), true);
+ check(pattern, toSupplementaries("abcdef"), false);
+
+ pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
+ check(pattern, toSupplementaries("((((abc.+?:)"), true);
+
+ flags |= Pattern.MULTILINE;
+
+ pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
+ check(pattern, toSupplementaries("abc^cat$def"), true);
+ check(pattern, toSupplementaries("cat"), false);
+
+ flags |= Pattern.DOTALL;
+
+ // note: this is case-sensitive.
+ pattern = Pattern.compile(toSupplementaries("a...b"), flags);
+ check(pattern, toSupplementaries("a...b"), true);
+ check(pattern, toSupplementaries("axxxb"), false);
+
+ flags |= Pattern.CANON_EQ;
+
+ String t = toSupplementaries("test");
+ p = Pattern.compile(t + "a\u030a", flags);
+ check(pattern, t + "a\u030a", false);
+ check(pattern, t + "\u00e5", false);
+
+ report("Literal pattern");
+ }
+
+ // This test is for 4803179
+ // This test is also for 4808962, replacement parts
+ private static void literalReplacementTest() throws Exception {
+ int flags = Pattern.LITERAL;
+
+ Pattern pattern = Pattern.compile("abc", flags);
+ Matcher matcher = pattern.matcher("zzzabczzz");
+ String replaceTest = "$0";
+ String result = matcher.replaceAll(replaceTest);
+ if (!result.equals("zzzabczzz"))
+ failCount++;
+
+ matcher.reset();
+ String literalReplacement = matcher.quoteReplacement(replaceTest);
+ result = matcher.replaceAll(literalReplacement);
+ if (!result.equals("zzz$0zzz"))
+ failCount++;
+
+ matcher.reset();
+ replaceTest = "\\t$\\$";
+ literalReplacement = matcher.quoteReplacement(replaceTest);
+ result = matcher.replaceAll(literalReplacement);
+ if (!result.equals("zzz\\t$\\$zzz"))
+ failCount++;
+
+ // Supplementary character test
+ pattern = Pattern.compile(toSupplementaries("abc"), flags);
+ matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
+ replaceTest = "$0";
+ result = matcher.replaceAll(replaceTest);
+ if (!result.equals(toSupplementaries("zzzabczzz")))
+ failCount++;
+
+ matcher.reset();
+ literalReplacement = matcher.quoteReplacement(replaceTest);
+ result = matcher.replaceAll(literalReplacement);
+ if (!result.equals(toSupplementaries("zzz$0zzz")))
+ failCount++;
+
+ matcher.reset();
+ replaceTest = "\\t$\\$";
+ literalReplacement = matcher.quoteReplacement(replaceTest);
+ result = matcher.replaceAll(literalReplacement);
+ if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
+ failCount++;
+
+ report("Literal replacement");
+ }
+
+ // This test is for 4757029
+ private static void regionTest() throws Exception {
+ Pattern pattern = Pattern.compile("abc");
+ Matcher matcher = pattern.matcher("abcdefabc");
+
+ matcher.region(0,9);
+ if (!matcher.find())
+ failCount++;
+ if (!matcher.find())
+ failCount++;
+ matcher.region(0,3);
+ if (!matcher.find())
+ failCount++;
+ matcher.region(3,6);
+ if (matcher.find())
+ failCount++;
+ matcher.region(0,2);
+ if (matcher.find())
+ failCount++;
+
+ expectRegionFail(matcher, 1, -1);
+ expectRegionFail(matcher, -1, -1);
+ expectRegionFail(matcher, -1, 1);
+ expectRegionFail(matcher, 5, 3);
+ expectRegionFail(matcher, 5, 12);
+ expectRegionFail(matcher, 12, 12);
+
+ pattern = Pattern.compile("^abc$");
+ matcher = pattern.matcher("zzzabczzz");
+ matcher.region(0,9);
+ if (matcher.find())
+ failCount++;
+ matcher.region(3,6);
+ if (!matcher.find())
+ failCount++;
+ matcher.region(3,6);
+ matcher.useAnchoringBounds(false);
+ if (matcher.find())
+ failCount++;
+
+ // Supplementary character test
+ pattern = Pattern.compile(toSupplementaries("abc"));
+ matcher = pattern.matcher(toSupplementaries("abcdefabc"));
+ matcher.region(0,9*2);
+ if (!matcher.find())
+ failCount++;
+ if (!matcher.find())
+ failCount++;
+ matcher.region(0,3*2);
+ if (!matcher.find())
+ failCount++;
+ matcher.region(1,3*2);
+ if (matcher.find())
+ failCount++;
+ matcher.region(3*2,6*2);
+ if (matcher.find())
+ failCount++;
+ matcher.region(0,2*2);
+ if (matcher.find())
+ failCount++;
+ matcher.region(0,2*2+1);
+ if (matcher.find())
+ failCount++;
+
+ expectRegionFail(matcher, 1*2, -1);
+ expectRegionFail(matcher, -1, -1);
+ expectRegionFail(matcher, -1, 1*2);
+ expectRegionFail(matcher, 5*2, 3*2);
+ expectRegionFail(matcher, 5*2, 12*2);
+ expectRegionFail(matcher, 12*2, 12*2);
+
+ pattern = Pattern.compile(toSupplementaries("^abc$"));
+ matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
+ matcher.region(0,9*2);
+ if (matcher.find())
+ failCount++;
+ matcher.region(3*2,6*2);
+ if (!matcher.find())
+ failCount++;
+ matcher.region(3*2+1,6*2);
+ if (matcher.find())
+ failCount++;
+ matcher.region(3*2,6*2-1);
+ if (matcher.find())
+ failCount++;
+ matcher.region(3*2,6*2);
+ matcher.useAnchoringBounds(false);
+ if (matcher.find())
+ failCount++;
+ report("Regions");
+ }
+
+ private static void expectRegionFail(Matcher matcher, int index1,
+ int index2)
+ {
+ try {
+ matcher.region(index1, index2);
+ failCount++;
+ } catch (IndexOutOfBoundsException ioobe) {
+ // Correct result
+ } catch (IllegalStateException ise) {
+ // Correct result
+ }
+ }
+
+ // This test is for 4803197
+ private static void escapedSegmentTest() throws Exception {
+
+ Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
+ check(pattern, "dir1\\dir2", true);
+
+ pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
+ check(pattern, "dir1\\dir2\\", true);
+
+ pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
+ check(pattern, "dir1\\dir2\\", true);
+
+ // Supplementary character test
+ pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
+ check(pattern, toSupplementaries("dir1\\dir2"), true);
+
+ pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
+ check(pattern, toSupplementaries("dir1\\dir2\\"), true);
+
+ pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
+ check(pattern, toSupplementaries("dir1\\dir2\\"), true);
+
+ report("Escaped segment");
+ }
+
+ // This test is for 4792284
+ private static void nonCaptureRepetitionTest() throws Exception {
+ String input = "abcdefgh;";
+
+ String[] patterns = new String[] {
+ "(?:\\w{4})+;",
+ "(?:\\w{8})*;",
+ "(?:\\w{2}){2,4};",
+ "(?:\\w{4}){2,};", // only matches the
+ ".*?(?:\\w{5})+;", // specified minimum
+ ".*?(?:\\w{9})*;", // number of reps - OK
+ "(?:\\w{4})+?;", // lazy repetition - OK
+ "(?:\\w{4})++;", // possessive repetition - OK
+ "(?:\\w{2,}?)+;", // non-deterministic - OK
+ "(\\w{4})+;", // capturing group - OK
+ };
+
+ for (int i = 0; i < patterns.length; i++) {
+ // Check find()
+ check(patterns[i], 0, input, input, true);
+ // Check matches()
+ Pattern p = Pattern.compile(patterns[i]);
+ Matcher m = p.matcher(input);
+
+ if (m.matches()) {
+ if (!m.group(0).equals(input))
+ failCount++;
+ } else {
+ failCount++;
+ }
+ }
+
+ report("Non capturing repetition");
+ }
+
+ // This test is for 6358731
+ private static void notCapturedGroupCurlyMatchTest() throws Exception {
+ Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
+ Matcher matcher = pattern.matcher("abcd");
+ if (!matcher.matches() ||
+ matcher.group(1) != null ||
+ !matcher.group(2).equals("abcd")) {
+ failCount++;
+ }
+ report("Not captured GroupCurly");
+ }
+
+ // This test is for 4706545
+ private static void javaCharClassTest() throws Exception {
+ for (int i=0; i<1000; i++) {
+ char c = (char)generator.nextInt();
+ check("{javaLowerCase}", c, Character.isLowerCase(c));
+ check("{javaUpperCase}", c, Character.isUpperCase(c));
+ check("{javaUpperCase}+", c, Character.isUpperCase(c));
+ check("{javaTitleCase}", c, Character.isTitleCase(c));
+ check("{javaDigit}", c, Character.isDigit(c));
+ check("{javaDefined}", c, Character.isDefined(c));
+ check("{javaLetter}", c, Character.isLetter(c));
+ check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
+ check("{javaJavaIdentifierStart}", c,
+ Character.isJavaIdentifierStart(c));
+ check("{javaJavaIdentifierPart}", c,
+ Character.isJavaIdentifierPart(c));
+ check("{javaUnicodeIdentifierStart}", c,
+ Character.isUnicodeIdentifierStart(c));
+ check("{javaUnicodeIdentifierPart}", c,
+ Character.isUnicodeIdentifierPart(c));
+ check("{javaIdentifierIgnorable}", c,
+ Character.isIdentifierIgnorable(c));
+ check("{javaSpaceChar}", c, Character.isSpaceChar(c));
+ check("{javaWhitespace}", c, Character.isWhitespace(c));
+ check("{javaISOControl}", c, Character.isISOControl(c));
+ check("{javaMirrored}", c, Character.isMirrored(c));
+
+ }
+
+ // Supplementary character test
+ for (int i=0; i<1000; i++) {
+ int c = generator.nextInt(Character.MAX_CODE_POINT
+ - Character.MIN_SUPPLEMENTARY_CODE_POINT)
+ + Character.MIN_SUPPLEMENTARY_CODE_POINT;
+ check("{javaLowerCase}", c, Character.isLowerCase(c));
+ check("{javaUpperCase}", c, Character.isUpperCase(c));
+ check("{javaUpperCase}+", c, Character.isUpperCase(c));
+ check("{javaTitleCase}", c, Character.isTitleCase(c));
+ check("{javaDigit}", c, Character.isDigit(c));
+ check("{javaDefined}", c, Character.isDefined(c));
+ check("{javaLetter}", c, Character.isLetter(c));
+ check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
+ check("{javaJavaIdentifierStart}", c,
+ Character.isJavaIdentifierStart(c));
+ check("{javaJavaIdentifierPart}", c,
+ Character.isJavaIdentifierPart(c));
+ check("{javaUnicodeIdentifierStart}", c,
+ Character.isUnicodeIdentifierStart(c));
+ check("{javaUnicodeIdentifierPart}", c,
+ Character.isUnicodeIdentifierPart(c));
+ check("{javaIdentifierIgnorable}", c,
+ Character.isIdentifierIgnorable(c));
+ check("{javaSpaceChar}", c, Character.isSpaceChar(c));
+ check("{javaWhitespace}", c, Character.isWhitespace(c));
+ check("{javaISOControl}", c, Character.isISOControl(c));
+ check("{javaMirrored}", c, Character.isMirrored(c));
+ }
+
+ report("Java character classes");
+ }
+
+ // This test is for 4523620
+ /*
+ private static void numOccurrencesTest() throws Exception {
+ Pattern pattern = Pattern.compile("aaa");
+
+ if (pattern.numOccurrences("aaaaaa", false) != 2)
+ failCount++;
+ if (pattern.numOccurrences("aaaaaa", true) != 4)
+ failCount++;
+
+ pattern = Pattern.compile("^");
+ if (pattern.numOccurrences("aaaaaa", false) != 1)
+ failCount++;
+ if (pattern.numOccurrences("aaaaaa", true) != 1)
+ failCount++;
+
+ report("Number of Occurrences");
+ }
+ */
+
+ // This test is for 4776374
+ private static void caretBetweenTerminatorsTest() throws Exception {
+ int flags1 = Pattern.DOTALL;
+ int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
+ int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
+ int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
+
+ check("^....", flags1, "test\ntest", "test", true);
+ check(".....^", flags1, "test\ntest", "test", false);
+ check(".....^", flags1, "test\n", "test", false);
+ check("....^", flags1, "test\r\n", "test", false);
+
+ check("^....", flags2, "test\ntest", "test", true);
+ check("....^", flags2, "test\ntest", "test", false);
+ check(".....^", flags2, "test\n", "test", false);
+ check("....^", flags2, "test\r\n", "test", false);
+
+ check("^....", flags3, "test\ntest", "test", true);
+ check(".....^", flags3, "test\ntest", "test\n", true);
+ check(".....^", flags3, "test\u0085test", "test\u0085", false);
+ check(".....^", flags3, "test\n", "test", false);
+ check(".....^", flags3, "test\r\n", "test", false);
+ check("......^", flags3, "test\r\ntest", "test\r\n", true);
+
+ check("^....", flags4, "test\ntest", "test", true);
+ check(".....^", flags3, "test\ntest", "test\n", true);
+ check(".....^", flags4, "test\u0085test", "test\u0085", true);
+ check(".....^", flags4, "test\n", "test\n", false);
+ check(".....^", flags4, "test\r\n", "test\r", false);
+
+ // Supplementary character test
+ String t = toSupplementaries("test");
+ check("^....", flags1, t+"\n"+t, t, true);
+ check(".....^", flags1, t+"\n"+t, t, false);
+ check(".....^", flags1, t+"\n", t, false);
+ check("....^", flags1, t+"\r\n", t, false);
+
+ check("^....", flags2, t+"\n"+t, t, true);
+ check("....^", flags2, t+"\n"+t, t, false);
+ check(".....^", flags2, t+"\n", t, false);
+ check("....^", flags2, t+"\r\n", t, false);
+
+ check("^....", flags3, t+"\n"+t, t, true);
+ check(".....^", flags3, t+"\n"+t, t+"\n", true);
+ check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
+ check(".....^", flags3, t+"\n", t, false);
+ check(".....^", flags3, t+"\r\n", t, false);
+ check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
+
+ check("^....", flags4, t+"\n"+t, t, true);
+ check(".....^", flags3, t+"\n"+t, t+"\n", true);
+ check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
+ check(".....^", flags4, t+"\n", t+"\n", false);
+ check(".....^", flags4, t+"\r\n", t+"\r", false);
+
+ report("Caret between terminators");
+ }
+
+ // This test is for 4727935
+ private static void dollarAtEndTest() throws Exception {
+ int flags1 = Pattern.DOTALL;
+ int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
+ int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
+
+ check("....$", flags1, "test\n", "test", true);
+ check("....$", flags1, "test\r\n", "test", true);
+ check(".....$", flags1, "test\n", "test\n", true);
+ check(".....$", flags1, "test\u0085", "test\u0085", true);
+ check("....$", flags1, "test\u0085", "test", true);
+
+ check("....$", flags2, "test\n", "test", true);
+ check(".....$", flags2, "test\n", "test\n", true);
+ check(".....$", flags2, "test\u0085", "test\u0085", true);
+ check("....$", flags2, "test\u0085", "est\u0085", true);
+
+ check("....$.blah", flags3, "test\nblah", "test\nblah", true);
+ check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
+ check("....$blah", flags3, "test\nblah", "!!!!", false);
+ check(".....$blah", flags3, "test\nblah", "!!!!", false);
+
+ // Supplementary character test
+ String t = toSupplementaries("test");
+ String b = toSupplementaries("blah");
+ check("....$", flags1, t+"\n", t, true);
+ check("....$", flags1, t+"\r\n", t, true);
+ check(".....$", flags1, t+"\n", t+"\n", true);
+ check(".....$", flags1, t+"\u0085", t+"\u0085", true);
+ check("....$", flags1, t+"\u0085", t, true);
+
+ check("....$", flags2, t+"\n", t, true);
+ check(".....$", flags2, t+"\n", t+"\n", true);
+ check(".....$", flags2, t+"\u0085", t+"\u0085", true);
+ check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
+
+ check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
+ check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
+ check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
+ check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
+
+ report("Dollar at End");
+ }
+
+ // This test is for 4711773
+ private static void multilineDollarTest() throws Exception {
+ Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
+ Matcher matcher = findCR.matcher("first bit\nsecond bit");
+ matcher.find();
+ if (matcher.start(0) != 9)
+ failCount++;
+ matcher.find();
+ if (matcher.start(0) != 20)
+ failCount++;
+
+ // Supplementary character test
+ matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
+ matcher.find();
+ if (matcher.start(0) != 9*2)
+ failCount++;
+ matcher.find();
+ if (matcher.start(0) != 20*2)
+ failCount++;
+
+ report("Multiline Dollar");
+ }
+
+ private static void reluctantRepetitionTest() throws Exception {
+ Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
+ check(p, "1 word word word 2", true);
+ check(p, "1 wor wo w 2", true);
+ check(p, "1 word word 2", true);
+ check(p, "1 word 2", true);
+ check(p, "1 wo w w 2", true);
+ check(p, "1 wo w 2", true);
+ check(p, "1 wor w 2", true);
+
+ p = Pattern.compile("([a-z])+?c");
+ Matcher m = p.matcher("ababcdefdec");
+ check(m, "ababc");
+
+ // Supplementary character test
+ p = Pattern.compile(toSupplementaries("([a-z])+?c"));
+ m = p.matcher(toSupplementaries("ababcdefdec"));
+ check(m, toSupplementaries("ababc"));
+
+ report("Reluctant Repetition");
+ }
+
+ private static void serializeTest() throws Exception {
+ String patternStr = "(b)";
+ String matchStr = "b";
+ Pattern pattern = Pattern.compile(patternStr);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ObjectOutputStream oos = new ObjectOutputStream(baos);
+ oos.writeObject(pattern);
+ oos.close();
+ ObjectInputStream ois = new ObjectInputStream(
+ new ByteArrayInputStream(baos.toByteArray()));
+ Pattern serializedPattern = (Pattern)ois.readObject();
+ ois.close();
+ Matcher matcher = serializedPattern.matcher(matchStr);
+ if (!matcher.matches())
+ failCount++;
+ if (matcher.groupCount() != 1)
+ failCount++;
+
+ report("Serialization");
+ }
+
+ private static void gTest() {
+ Pattern pattern = Pattern.compile("\\G\\w");
+ Matcher matcher = pattern.matcher("abc#x#x");
+ matcher.find();
+ matcher.find();
+ matcher.find();
+ if (matcher.find())
+ failCount++;
+
+ pattern = Pattern.compile("\\GA*");
+ matcher = pattern.matcher("1A2AA3");
+ matcher.find();
+ if (matcher.find())
+ failCount++;
+
+ pattern = Pattern.compile("\\GA*");
+ matcher = pattern.matcher("1A2AA3");
+ if (!matcher.find(1))
+ failCount++;
+ matcher.find();
+ if (matcher.find())
+ failCount++;
+
+ report("\\G");
+ }
+
+ private static void zTest() {
+ Pattern pattern = Pattern.compile("foo\\Z");
+ // Positives
+ check(pattern, "foo\u0085", true);
+ check(pattern, "foo\u2028", true);
+ check(pattern, "foo\u2029", true);
+ check(pattern, "foo\n", true);
+ check(pattern, "foo\r", true);
+ check(pattern, "foo\r\n", true);
+ // Negatives
+ check(pattern, "fooo", false);
+ check(pattern, "foo\n\r", false);
+
+ pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
+ // Positives
+ check(pattern, "foo", true);
+ check(pattern, "foo\n", true);
+ // Negatives
+ check(pattern, "foo\r", false);
+ check(pattern, "foo\u0085", false);
+ check(pattern, "foo\u2028", false);
+ check(pattern, "foo\u2029", false);
+
+ report("\\Z");
+ }
+
+ private static void replaceFirstTest() {
+ Pattern pattern = Pattern.compile("(ab)(c*)");
+ Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
+ if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
+ failCount++;
+
+ matcher.reset("zzzabccczzzabcczzzabccczzz");
+ if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
+ failCount++;
+
+ matcher.reset("zzzabccczzzabcczzzabccczzz");
+ String result = matcher.replaceFirst("$1");
+ if (!result.equals("zzzabzzzabcczzzabccczzz"))
+ failCount++;
+
+ matcher.reset("zzzabccczzzabcczzzabccczzz");
+ result = matcher.replaceFirst("$2");
+ if (!result.equals("zzzccczzzabcczzzabccczzz"))
+ failCount++;
+
+ pattern = Pattern.compile("a*");
+ matcher = pattern.matcher("aaaaaaaaaa");
+ if (!matcher.replaceFirst("test").equals("test"))
+ failCount++;
+
+ pattern = Pattern.compile("a+");
+ matcher = pattern.matcher("zzzaaaaaaaaaa");
+ if (!matcher.replaceFirst("test").equals("zzztest"))
+ failCount++;
+
+ // Supplementary character test
+ pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
+ matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
+ if (!matcher.replaceFirst(toSupplementaries("test"))
+ .equals(toSupplementaries("testzzzabcczzzabccc")))
+ failCount++;
+
+ matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
+ if (!matcher.replaceFirst(toSupplementaries("test")).
+ equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
+ failCount++;
+
+ matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
+ result = matcher.replaceFirst("$1");
+ if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
+ failCount++;
+
+ matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
+ result = matcher.replaceFirst("$2");
+ if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
+ failCount++;
+
+ pattern = Pattern.compile(toSupplementaries("a*"));
+ matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
+ if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
+ failCount++;
+
+ pattern = Pattern.compile(toSupplementaries("a+"));
+ matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
+ if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
+ failCount++;
+
+ report("Replace First");
+ }
+
+ private static void unixLinesTest() {
+ Pattern pattern = Pattern.compile(".*");
+ Matcher matcher = pattern.matcher("aa\u2028blah");
+ matcher.find();
+ if (!matcher.group(0).equals("aa"))
+ failCount++;
+
+ pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
+ matcher = pattern.matcher("aa\u2028blah");
+ matcher.find();
+ if (!matcher.group(0).equals("aa\u2028blah"))
+ failCount++;
+
+ pattern = Pattern.compile("[az]$",
+ Pattern.MULTILINE | Pattern.UNIX_LINES);
+ matcher = pattern.matcher("aa\u2028zz");
+ check(matcher, "a\u2028", false);
+
+ // Supplementary character test
+ pattern = Pattern.compile(".*");
+ matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
+ matcher.find();
+ if (!matcher.group(0).equals(toSupplementaries("aa")))
+ failCount++;
+
+ pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
+ matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
+ matcher.find();
+ if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
+ failCount++;
+
+ pattern = Pattern.compile(toSupplementaries("[az]$"),
+ Pattern.MULTILINE | Pattern.UNIX_LINES);
+ matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
+ check(matcher, toSupplementaries("a\u2028"), false);
+
+ report("Unix Lines");
+ }
+
+ private static void commentsTest() {
+ int flags = Pattern.COMMENTS;
+
+ Pattern pattern = Pattern.compile("aa \\# aa", flags);
+ Matcher matcher = pattern.matcher("aa#aa");
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile("aa # blah", flags);
+ matcher = pattern.matcher("aa");
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile("aa blah", flags);
+ matcher = pattern.matcher("aablah");
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile("aa # blah blech ", flags);
+ matcher = pattern.matcher("aa");
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile("aa # blah\n ", flags);
+ matcher = pattern.matcher("aa");
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile("aa # blah\nbc # blech", flags);
+ matcher = pattern.matcher("aabc");
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile("aa # blah\nbc# blech", flags);
+ matcher = pattern.matcher("aabc");
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
+ matcher = pattern.matcher("aabc#blech");
+ if (!matcher.matches())
+ failCount++;
+
+ // Supplementary character test
+ pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
+ matcher = pattern.matcher(toSupplementaries("aa#aa"));
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
+ matcher = pattern.matcher(toSupplementaries("aa"));
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
+ matcher = pattern.matcher(toSupplementaries("aablah"));
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
+ matcher = pattern.matcher(toSupplementaries("aa"));
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
+ matcher = pattern.matcher(toSupplementaries("aa"));
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
+ matcher = pattern.matcher(toSupplementaries("aabc"));
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
+ matcher = pattern.matcher(toSupplementaries("aabc"));
+ if (!matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
+ matcher = pattern.matcher(toSupplementaries("aabc#blech"));
+ if (!matcher.matches())
+ failCount++;
+
+ report("Comments");
+ }
+
+ private static void caseFoldingTest() { // bug 4504687
+ int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
+ Pattern pattern = Pattern.compile("aa", flags);
+ Matcher matcher = pattern.matcher("ab");
+ if (matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile("aA", flags);
+ matcher = pattern.matcher("ab");
+ if (matcher.matches())
+ failCount++;
+
+ pattern = Pattern.compile("aa", flags);
+ matcher = pattern.matcher("aB");
+ if (matcher.matches())
+ failCount++;
+ matcher = pattern.matcher("Ab");
+ if (matcher.matches())
+ failCount++;
+
+ // ASCII "a"
+ // Latin-1 Supplement "a" + grave
+ // Cyrillic "a"
+ String[] patterns = new String[] {
+ //single
+ "a", "\u00e0", "\u0430",
+ //slice
+ "ab", "\u00e0\u00e1", "\u0430\u0431",
+ //class single
+ "[a]", "[\u00e0]", "[\u0430]",
+ //class range
+ "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
+ //back reference
+ "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
+ };
+
+ String[] texts = new String[] {
+ "A", "\u00c0", "\u0410",
+ "AB", "\u00c0\u00c1", "\u0410\u0411",
+ "A", "\u00c0", "\u0410",
+ "B", "\u00c2", "\u0411",
+ "aA", "\u00e0\u00c0", "\u0430\u0410"
+ };
+
+ boolean[] expected = new boolean[] {
+ true, false, false,
+ true, false, false,
+ true, false, false,
+ true, false, false,
+ true, false, false
+ };
+
+ flags = Pattern.CASE_INSENSITIVE;
+ for (int i = 0; i < patterns.length; i++) {
+ pattern = Pattern.compile(patterns[i], flags);
+ matcher = pattern.matcher(texts[i]);
+ if (matcher.matches() != expected[i]) {
+ System.out.println("<1> Failed at " + i);
+ failCount++;
+ }
+ }
+
+ flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
+ for (int i = 0; i < patterns.length; i++) {
+ pattern = Pattern.compile(patterns[i], flags);
+ matcher = pattern.matcher(texts[i]);
+ if (!matcher.matches()) {
+ System.out.println("<2> Failed at " + i);
+ failCount++;
+ }
+ }
+ // flag unicode_case alone should do nothing
+ flags = Pattern.UNICODE_CASE;
+ for (int i = 0; i < patterns.length; i++) {
+ pattern = Pattern.compile(patterns[i], flags);
+ matcher = pattern.matcher(texts[i]);
+ if (matcher.matches()) {
+ System.out.println("<3> Failed at " + i);
+ failCount++;
+ }
+ }
+
+ // Special cases: i, I, u+0131 and u+0130
+ flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
+ pattern = Pattern.compile("[h-j]+", flags);
+ if (!pattern.matcher("\u0131\u0130").matches())
+ failCount++;
+ report("Case Folding");
+ }
+
+ private static void appendTest() {
+ Pattern pattern = Pattern.compile("(ab)(cd)");
+ Matcher matcher = pattern.matcher("abcd");
+ String result = matcher.replaceAll("$2$1");
+ if (!result.equals("cdab"))
+ failCount++;
+
+ String s1 = "Swap all: first = 123, second = 456";
+ String s2 = "Swap one: first = 123, second = 456";
+ String r = "$3$2$1";
+ pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
+ matcher = pattern.matcher(s1);
+
+ result = matcher.replaceAll(r);
+ if (!result.equals("Swap all: 123 = first, 456 = second"))
+ failCount++;
+
+ matcher = pattern.matcher(s2);
+
+ if (matcher.find()) {
+ StringBuffer sb = new StringBuffer();
+ matcher.appendReplacement(sb, r);
+ matcher.appendTail(sb);
+ result = sb.toString();
+ if (!result.equals("Swap one: 123 = first, second = 456"))
+ failCount++;
+ }
+
+ // Supplementary character test
+ pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
+ matcher = pattern.matcher(toSupplementaries("abcd"));
+ result = matcher.replaceAll("$2$1");
+ if (!result.equals(toSupplementaries("cdab")))
+ failCount++;
+
+ s1 = toSupplementaries("Swap all: first = 123, second = 456");
+ s2 = toSupplementaries("Swap one: first = 123, second = 456");
+ r = toSupplementaries("$3$2$1");
+ pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
+ matcher = pattern.matcher(s1);
+
+ result = matcher.replaceAll(r);
+ if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
+ failCount++;
+
+ matcher = pattern.matcher(s2);
+
+ if (matcher.find()) {
+ StringBuffer sb = new StringBuffer();
+ matcher.appendReplacement(sb, r);
+ matcher.appendTail(sb);
+ result = sb.toString();
+ if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
+ failCount++;
+ }
+ report("Append");
+ }
+
+ private static void splitTest() {
+ Pattern pattern = Pattern.compile(":");
+ String[] result = pattern.split("foo:and:boo", 2);
+ if (!result[0].equals("foo"))
+ failCount++;
+ if (!result[1].equals("and:boo"))
+ failCount++;
+ // Supplementary character test
+ Pattern patternX = Pattern.compile(toSupplementaries("X"));
+ result = patternX.split(toSupplementaries("fooXandXboo"), 2);
+ if (!result[0].equals(toSupplementaries("foo")))
+ failCount++;
+ if (!result[1].equals(toSupplementaries("andXboo")))
+ failCount++;
+
+ CharBuffer cb = CharBuffer.allocate(100);
+ cb.put("foo:and:boo");
+ cb.flip();
+ result = pattern.split(cb);
+ if (!result[0].equals("foo"))
+ failCount++;
+ if (!result[1].equals("and"))
+ failCount++;
+ if (!result[2].equals("boo"))
+ failCount++;
+
+ // Supplementary character test
+ CharBuffer cbs = CharBuffer.allocate(100);
+ cbs.put(toSupplementaries("fooXandXboo"));
+ cbs.flip();
+ result = patternX.split(cbs);
+ if (!result[0].equals(toSupplementaries("foo")))
+ failCount++;
+ if (!result[1].equals(toSupplementaries("and")))
+ failCount++;
+ if (!result[2].equals(toSupplementaries("boo")))
+ failCount++;
+
+ String source = "0123456789";
+ for (int limit=-2; limit<3; limit++) {
+ for (int x=0; x<10; x++) {
+ result = source.split(Integer.toString(x), limit);
+ int expectedLength = limit < 1 ? 2 : limit;
+
+ if ((limit == 0) && (x == 9)) {
+ // expected dropping of ""
+ if (result.length != 1)
+ failCount++;
+ if (!result[0].equals("012345678")) {
+ failCount++;
+ }
+ } else {
+ if (result.length != expectedLength) {
+ failCount++;
+ }
+ if (!result[0].equals(source.substring(0,x))) {
+ if (limit != 1) {
+ failCount++;
+ } else {
+ if (!result[0].equals(source.substring(0,10))) {
+ failCount++;
+ }
+ }
+ }
+ if (expectedLength > 1) { // Check segment 2
+ if (!result[1].equals(source.substring(x+1,10)))
+ failCount++;
+ }
+ }
+ }
+ }
+ // Check the case for no match found
+ for (int limit=-2; limit<3; limit++) {
+ result = source.split("e", limit);
+ if (result.length != 1)
+ failCount++;
+ if (!result[0].equals(source))
+ failCount++;
+ }
+ // Check the case for limit == 0, source = "";
+ source = "";
+ result = source.split("e", 0);
+ if (result.length != 1)
+ failCount++;
+ if (!result[0].equals(source))
+ failCount++;
+
+ report("Split");
+ }
+
+ private static void negationTest() {
+ Pattern pattern = Pattern.compile("[\\[@^]+");
+ Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
+ if (!matcher.find())
+ failCount++;
+ if (!matcher.group(0).equals("@@@@[[[[^^^^"))
+ failCount++;
+ pattern = Pattern.compile("[@\\[^]+");
+ matcher = pattern.matcher("@@@@[[[[^^^^");
+ if (!matcher.find())
+ failCount++;
+ if (!matcher.group(0).equals("@@@@[[[[^^^^"))
+ failCount++;
+ pattern = Pattern.compile("[@\\[^@]+");
+ matcher = pattern.matcher("@@@@[[[[^^^^");
+ if (!matcher.find())
+ failCount++;
+ if (!matcher.group(0).equals("@@@@[[[[^^^^"))
+ failCount++;
+
+ pattern = Pattern.compile("\\)");
+ matcher = pattern.matcher("xxx)xxx");
+ if (!matcher.find())
+ failCount++;
+
+ report("Negation");
+ }
+
+ private static void ampersandTest() {
+ Pattern pattern = Pattern.compile("[&@]+");
+ check(pattern, "@@@@&&&&", true);
+
+ pattern = Pattern.compile("[@&]+");
+ check(pattern, "@@@@&&&&", true);
+
+ pattern = Pattern.compile("[@\\&]+");
+ check(pattern, "@@@@&&&&", true);
+
+ report("Ampersand");
+ }
+
+ private static void octalTest() throws Exception {
+ Pattern pattern = Pattern.compile("\\u0007");
+ Matcher matcher = pattern.matcher("\u0007");
+ if (!matcher.matches())
+ failCount++;
+ pattern = Pattern.compile("\\07");
+ matcher = pattern.matcher("\u0007");
+ if (!matcher.matches())
+ failCount++;
+ pattern = Pattern.compile("\\007");
+ matcher = pattern.matcher("\u0007");
+ if (!matcher.matches())
+ failCount++;
+ pattern = Pattern.compile("\\0007");
+ matcher = pattern.matcher("\u0007");
+ if (!matcher.matches())
+ failCount++;
+ pattern = Pattern.compile("\\040");
+ matcher = pattern.matcher("\u0020");
+ if (!matcher.matches())
+ failCount++;
+ pattern = Pattern.compile("\\0403");
+ matcher = pattern.matcher("\u00203");
+ if (!matcher.matches())
+ failCount++;
+ pattern = Pattern.compile("\\0103");
+ matcher = pattern.matcher("\u0043");
+ if (!matcher.matches())
+ failCount++;
+
+ report("Octal");
+ }
+
+ private static void longPatternTest() throws Exception {
+ try {
+ Pattern pattern = Pattern.compile(
+ "a 32-character-long pattern xxxx");
+ pattern = Pattern.compile("a 33-character-long pattern xxxxx");
+ pattern = Pattern.compile("a thirty four character long regex");
+ StringBuffer patternToBe = new StringBuffer(101);
+ for (int i=0; i<100; i++)
+ patternToBe.append((char)(97 + i%26));
+ pattern = Pattern.compile(patternToBe.toString());
+ } catch (PatternSyntaxException e) {
+ failCount++;
+ }
+
+ // Supplementary character test
+ try {
+ Pattern pattern = Pattern.compile(
+ toSupplementaries("a 32-character-long pattern xxxx"));
+ pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
+ pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
+ StringBuffer patternToBe = new StringBuffer(101*2);
+ for (int i=0; i<100; i++)
+ patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
+ + 97 + i%26));
+ pattern = Pattern.compile(patternToBe.toString());
+ } catch (PatternSyntaxException e) {
+ failCount++;
+ }
+ report("LongPattern");
+ }
+
+ private static void group0Test() throws Exception {
+ Pattern pattern = Pattern.compile("(tes)ting");
+ Matcher matcher = pattern.matcher("testing");
+ check(matcher, "testing");
+
+ matcher.reset("testing");
+ if (matcher.lookingAt()) {
+ if (!matcher.group(0).equals("testing"))
+ failCount++;
+ } else {
+ failCount++;
+ }
+
+ matcher.reset("testing");
+ if (matcher.matches()) {
+ if (!matcher.group(0).equals("testing"))
+ failCount++;
+ } else {
+ failCount++;
+ }
+
+ pattern = Pattern.compile("(tes)ting");
+ matcher = pattern.matcher("testing");
+ if (matcher.lookingAt()) {
+ if (!matcher.group(0).equals("testing"))
+ failCount++;
+ } else {
+ failCount++;
+ }
+
+ pattern = Pattern.compile("^(tes)ting");
+ matcher = pattern.matcher("testing");
+ if (matcher.matches()) {
+ if (!matcher.group(0).equals("testing"))
+ failCount++;
+ } else {
+ failCount++;
+ }
+
+ // Supplementary character test
+ pattern = Pattern.compile(toSupplementaries("(tes)ting"));
+ matcher = pattern.matcher(toSupplementaries("testing"));
+ check(matcher, toSupplementaries("testing"));
+
+ matcher.reset(toSupplementaries("testing"));
+ if (matcher.lookingAt()) {
+ if (!matcher.group(0).equals(toSupplementaries("testing")))
+ failCount++;
+ } else {
+ failCount++;
+ }
+
+ matcher.reset(toSupplementaries("testing"));
+ if (matcher.matches()) {
+ if (!matcher.group(0).equals(toSupplementaries("testing")))
+ failCount++;
+ } else {
+ failCount++;
+ }
+
+ pattern = Pattern.compile(toSupplementaries("(tes)ting"));
+ matcher = pattern.matcher(toSupplementaries("testing"));
+ if (matcher.lookingAt()) {
+ if (!matcher.group(0).equals(toSupplementaries("testing")))
+ failCount++;
+ } else {
+ failCount++;
+ }
+
+ pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
+ matcher = pattern.matcher(toSupplementaries("testing"));
+ if (matcher.matches()) {
+ if (!matcher.group(0).equals(toSupplementaries("testing")))
+ failCount++;
+ } else {
+ failCount++;
+ }
+
+ report("Group0");
+ }
+
+ private static void findIntTest() throws Exception {
+ Pattern p = Pattern.compile("blah");
+ Matcher m = p.matcher("zzzzblahzzzzzblah");
+ boolean result = m.find(2);
+ if (!result)
+ failCount++;
+
+ p = Pattern.compile("$");
+ m = p.matcher("1234567890");
+ result = m.find(10);
+ if (!result)
+ failCount++;
+ try {
+ result = m.find(11);
+ failCount++;
+ } catch (IndexOutOfBoundsException e) {
+ // correct result
+ }
+
+ // Supplementary character test
+ p = Pattern.compile(toSupplementaries("blah"));
+ m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
+ result = m.find(2);
+ if (!result)
+ failCount++;
+
+ report("FindInt");
+ }
+
+ private static void emptyPatternTest() throws Exception {
+ Pattern p = Pattern.compile("");
+ Matcher m = p.matcher("foo");
+
+ // Should find empty pattern at beginning of input
+ boolean result = m.find();
+ if (result != true)
+ failCount++;
+ if (m.start() != 0)
+ failCount++;
+
+ // Should not match entire input if input is not empty
+ m.reset();
+ result = m.matches();
+ if (result == true)
+ failCount++;
+
+ try {
+ m.start(0);
+ failCount++;
+ } catch (IllegalStateException e) {
+ // Correct result
+ }
+
+ // Should match entire input if input is empty
+ m.reset("");
+ result = m.matches();
+ if (result != true)
+ failCount++;
+
+ result = Pattern.matches("", "");
+ if (result != true)
+ failCount++;
+
+ result = Pattern.matches("", "foo");
+ if (result == true)
+ failCount++;
+ report("EmptyPattern");
+ }
+
+ private static void charClassTest() throws Exception {
+ Pattern pattern = Pattern.compile("blah[ab]]blech");
+ check(pattern, "blahb]blech", true);
+
+ pattern = Pattern.compile("[abc[def]]");
+ check(pattern, "b", true);
+
+ // Supplementary character tests
+ pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
+ check(pattern, toSupplementaries("blahb]blech"), true);
+
+ pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
+ check(pattern, toSupplementaries("b"), true);
+
+ try {
+ // u00ff when UNICODE_CASE
+ pattern = Pattern.compile("[ab\u00ffcd]",
+ Pattern.CASE_INSENSITIVE|
+ Pattern.UNICODE_CASE);
+ check(pattern, "ab\u00ffcd", true);
+ check(pattern, "Ab\u0178Cd", true);
+
+ // u00b5 when UNICODE_CASE
+ pattern = Pattern.compile("[ab\u00b5cd]",
+ Pattern.CASE_INSENSITIVE|
+ Pattern.UNICODE_CASE);
+ check(pattern, "ab\u00b5cd", true);
+ check(pattern, "Ab\u039cCd", true);
+ } catch (Exception e) { failCount++; }
+
+ /* Special cases
+ (1)LatinSmallLetterLongS u+017f
+ (2)LatinSmallLetterDotlessI u+0131
+ (3)LatineCapitalLetterIWithDotAbove u+0130
+ (4)KelvinSign u+212a
+ (5)AngstromSign u+212b
+ */
+ int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
+ pattern = Pattern.compile("[sik\u00c5]+", flags);
+ if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
+ failCount++;
+
+ report("CharClass");
+ }
+
+ private static void caretTest() throws Exception {
+ Pattern pattern = Pattern.compile("\\w*");
+ Matcher matcher = pattern.matcher("a#bc#def##g");
+ check(matcher, "a");
+ check(matcher, "");
+ check(matcher, "bc");
+ check(matcher, "");
+ check(matcher, "def");
+ check(matcher, "");
+ check(matcher, "");
+ check(matcher, "g");
+ check(matcher, "");
+ if (matcher.find())
+ failCount++;
+
+ pattern = Pattern.compile("^\\w*");
+ matcher = pattern.matcher("a#bc#def##g");
+ check(matcher, "a");
+ if (matcher.find())
+ failCount++;
+
+ pattern = Pattern.compile("\\w");
+ matcher = pattern.matcher("abc##x");
+ check(matcher, "a");
+ check(matcher, "b");
+ check(matcher, "c");
+ check(matcher, "x");
+ if (matcher.find())
+ failCount++;
+
+ pattern = Pattern.compile("^\\w");
+ matcher = pattern.matcher("abc##x");
+ check(matcher, "a");
+ if (matcher.find())
+ failCount++;
+
+ pattern = Pattern.compile("\\A\\p{Alpha}{3}");
+ matcher = pattern.matcher("abcdef-ghi\njklmno");
+ check(matcher, "abc");
+ if (matcher.find())
+ failCount++;
+
+ pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
+ matcher = pattern.matcher("abcdef-ghi\njklmno");
+ check(matcher, "abc");
+ check(matcher, "jkl");
+ if (matcher.find())
+ failCount++;
+
+ pattern = Pattern.compile("^", Pattern.MULTILINE);
+ matcher = pattern.matcher("this is some text");
+ String result = matcher.replaceAll("X");
+ if (!result.equals("Xthis is some text"))
+ failCount++;
+
+ pattern = Pattern.compile("^");
+ matcher = pattern.matcher("this is some text");
+ result = matcher.replaceAll("X");
+ if (!result.equals("Xthis is some text"))
+ failCount++;
+
+ pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
+ matcher = pattern.matcher("this is some text\n");
+ result = matcher.replaceAll("X");
+ if (!result.equals("Xthis is some text\n"))
+ failCount++;
+
+ report("Caret");
+ }
+
+ private static void groupCaptureTest() throws Exception {
+ // Independent group
+ Pattern pattern = Pattern.compile("x+(?>y+)z+");
+ Matcher matcher = pattern.matcher("xxxyyyzzz");
+ matcher.find();
+ try {
+ String blah = matcher.group(1);
+ failCount++;
+ } catch (IndexOutOfBoundsException ioobe) {
+ // Good result
+ }
+ // Pure group
+ pattern = Pattern.compile("x+(?:y+)z+");
+ matcher = pattern.matcher("xxxyyyzzz");
+ matcher.find();
+ try {
+ String blah = matcher.group(1);
+ failCount++;
+ } catch (IndexOutOfBoundsException ioobe) {
+ // Good result
+ }
+
+ // Supplementary character tests
+ // Independent group
+ pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
+ matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
+ matcher.find();
+ try {
+ String blah = matcher.group(1);
+ failCount++;
+ } catch (IndexOutOfBoundsException ioobe) {
+ // Good result
+ }
+ // Pure group
+ pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
+ matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
+ matcher.find();
+ try {
+ String blah = matcher.group(1);
+ failCount++;
+ } catch (IndexOutOfBoundsException ioobe) {
+ // Good result
+ }
+
+ report("GroupCapture");
+ }
+
+ private static void backRefTest() throws Exception {
+ Pattern pattern = Pattern.compile("(a*)bc\\1");
+ check(pattern, "zzzaabcazzz", true);
+
+ pattern = Pattern.compile("(a*)bc\\1");
+ check(pattern, "zzzaabcaazzz", true);
+
+ pattern = Pattern.compile("(abc)(def)\\1");
+ check(pattern, "abcdefabc", true);
+
+ pattern = Pattern.compile("(abc)(def)\\3");
+ check(pattern, "abcdefabc", false);
+
+ try {
+ for (int i = 1; i < 10; i++) {
+ // Make sure backref 1-9 are always accepted
+ pattern = Pattern.compile("abcdef\\" + i);
+ // and fail to match if the target group does not exit
+ check(pattern, "abcdef", false);
+ }
+ } catch(PatternSyntaxException e) {
+ failCount++;
+ }
+
+ pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
+ check(pattern, "abcdefghija", false);
+ check(pattern, "abcdefghija1", true);
+
+ pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
+ check(pattern, "abcdefghijkk", true);
+
+ pattern = Pattern.compile("(a)bcdefghij\\11");
+ check(pattern, "abcdefghija1", true);
+
+ // Supplementary character tests
+ pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
+ check(pattern, toSupplementaries("zzzaabcazzz"), true);
+
+ pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
+ check(pattern, toSupplementaries("zzzaabcaazzz"), true);
+
+ pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
+ check(pattern, toSupplementaries("abcdefabc"), true);
+
+ pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
+ check(pattern, toSupplementaries("abcdefabc"), false);
+
+ pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
+ check(pattern, toSupplementaries("abcdefghija"), false);
+ check(pattern, toSupplementaries("abcdefghija1"), true);
+
+ pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
+ check(pattern, toSupplementaries("abcdefghijkk"), true);
+
+ report("BackRef");
+ }
+
+ /**
+ * Unicode Technical Report #18, section 2.6 End of Line
+ * There is no empty line to be matched in the sequence \u000D\u000A
+ * but there is an empty line in the sequence \u000A\u000D.
+ */
+ private static void anchorTest() throws Exception {
+ Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
+ Matcher m = p.matcher("blah1\r\nblah2");
+ m.find();
+ m.find();
+ if (!m.group().equals("blah2"))
+ failCount++;
+
+ m.reset("blah1\n\rblah2");
+ m.find();
+ m.find();
+ m.find();
+ if (!m.group().equals("blah2"))
+ failCount++;
+
+ // Test behavior of $ with \r\n at end of input
+ p = Pattern.compile(".+$");
+ m = p.matcher("blah1\r\n");
+ if (!m.find())
+ failCount++;
+ if (!m.group().equals("blah1"))
+ failCount++;
+ if (m.find())
+ failCount++;
+
+ // Test behavior of $ with \r\n at end of input in multiline
+ p = Pattern.compile(".+$", Pattern.MULTILINE);
+ m = p.matcher("blah1\r\n");
+ if (!m.find())
+ failCount++;
+ if (m.find())
+ failCount++;
+
+ // Test for $ recognition of \u0085 for bug 4527731
+ p = Pattern.compile(".+$", Pattern.MULTILINE);
+ m = p.matcher("blah1\u0085");
+ if (!m.find())
+ failCount++;
+
+ // Supplementary character test
+ p = Pattern.compile("^.*$", Pattern.MULTILINE);
+ m = p.matcher(toSupplementaries("blah1\r\nblah2"));
+ m.find();
+ m.find();
+ if (!m.group().equals(toSupplementaries("blah2")))
+ failCount++;
+
+ m.reset(toSupplementaries("blah1\n\rblah2"));
+ m.find();
+ m.find();
+ m.find();
+ if (!m.group().equals(toSupplementaries("blah2")))
+ failCount++;
+
+ // Test behavior of $ with \r\n at end of input
+ p = Pattern.compile(".+$");
+ m = p.matcher(toSupplementaries("blah1\r\n"));
+ if (!m.find())
+ failCount++;
+ if (!m.group().equals(toSupplementaries("blah1")))
+ failCount++;
+ if (m.find())
+ failCount++;
+
+ // Test behavior of $ with \r\n at end of input in multiline
+ p = Pattern.compile(".+$", Pattern.MULTILINE);
+ m = p.matcher(toSupplementaries("blah1\r\n"));
+ if (!m.find())
+ failCount++;
+ if (m.find())
+ failCount++;
+
+ // Test for $ recognition of \u0085 for bug 4527731
+ p = Pattern.compile(".+$", Pattern.MULTILINE);
+ m = p.matcher(toSupplementaries("blah1\u0085"));
+ if (!m.find())
+ failCount++;
+
+ report("Anchors");
+ }
+
+ /**
+ * A basic sanity test of Matcher.lookingAt().
+ */
+ private static void lookingAtTest() throws Exception {
+ Pattern p = Pattern.compile("(ab)(c*)");
+ Matcher m = p.matcher("abccczzzabcczzzabccc");
+
+ if (!m.lookingAt())
+ failCount++;
+
+ if (!m.group().equals(m.group(0)))
+ failCount++;
+
+ m = p.matcher("zzzabccczzzabcczzzabccczzz");
+ if (m.lookingAt())
+ failCount++;
+
+ // Supplementary character test
+ p = Pattern.compile(toSupplementaries("(ab)(c*)"));
+ m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
+
+ if (!m.lookingAt())
+ failCount++;
+
+ if (!m.group().equals(m.group(0)))
+ failCount++;
+
+ m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
+ if (m.lookingAt())
+ failCount++;
+
+ report("Looking At");
+ }
+
+ /**
+ * A basic sanity test of Matcher.matches().
+ */
+ private static void matchesTest() throws Exception {
+ // matches()
+ Pattern p = Pattern.compile("ulb(c*)");
+ Matcher m = p.matcher("ulbcccccc");
+ if (!m.matches())
+ failCount++;
+
+ // find() but not matches()
+ m.reset("zzzulbcccccc");
+ if (m.matches())
+ failCount++;
+
+ // lookingAt() but not matches()
+ m.reset("ulbccccccdef");
+ if (m.matches())
+ failCount++;
+
+ // matches()
+ p = Pattern.compile("a|ad");
+ m = p.matcher("ad");
+ if (!m.matches())
+ failCount++;
+
+ // Supplementary character test
+ // matches()
+ p = Pattern.compile(toSupplementaries("ulb(c*)"));
+ m = p.matcher(toSupplementaries("ulbcccccc"));
+ if (!m.matches())
+ failCount++;
+
+ // find() but not matches()
+ m.reset(toSupplementaries("zzzulbcccccc"));
+ if (m.matches())
+ failCount++;
+
+ // lookingAt() but not matches()
+ m.reset(toSupplementaries("ulbccccccdef"));
+ if (m.matches())
+ failCount++;
+
+ // matches()
+ p = Pattern.compile(toSupplementaries("a|ad"));
+ m = p.matcher(toSupplementaries("ad"));
+ if (!m.matches())
+ failCount++;
+
+ report("Matches");
+ }
+
+ /**
+ * A basic sanity test of Pattern.matches().
+ */
+ private static void patternMatchesTest() throws Exception {
+ // matches()
+ if (!Pattern.matches(toSupplementaries("ulb(c*)"),
+ toSupplementaries("ulbcccccc")))
+ failCount++;
+
+ // find() but not matches()
+ if (Pattern.matches(toSupplementaries("ulb(c*)"),
+ toSupplementaries("zzzulbcccccc")))
+ failCount++;
+
+ // lookingAt() but not matches()
+ if (Pattern.matches(toSupplementaries("ulb(c*)"),
+ toSupplementaries("ulbccccccdef")))
+ failCount++;
+
+ // Supplementary character test
+ // matches()
+ if (!Pattern.matches(toSupplementaries("ulb(c*)"),
+ toSupplementaries("ulbcccccc")))
+ failCount++;
+
+ // find() but not matches()
+ if (Pattern.matches(toSupplementaries("ulb(c*)"),
+ toSupplementaries("zzzulbcccccc")))
+ failCount++;
+
+ // lookingAt() but not matches()
+ if (Pattern.matches(toSupplementaries("ulb(c*)"),
+ toSupplementaries("ulbccccccdef")))
+ failCount++;
+
+ report("Pattern Matches");
+ }
+
+ /**
+ * Canonical equivalence testing. Tests the ability of the engine
+ * to match sequences that are not explicitly specified in the
+ * pattern when they are considered equivalent by the Unicode Standard.
+ */
+ private static void ceTest() throws Exception {
+ // Decomposed char outside char classes
+ Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
+ Matcher m = p.matcher("test\u00e5");
+ if (!m.matches())
+ failCount++;
+
+ m.reset("testa\u030a");
+ if (!m.matches())
+ failCount++;
+
+ // Composed char outside char classes
+ p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
+ m = p.matcher("test\u00e5");
+ if (!m.matches())
+ failCount++;
+
+ m.reset("testa\u030a");
+ if (!m.find())
+ failCount++;
+
+ // Decomposed char inside a char class
+ p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
+ m = p.matcher("test\u00e5");
+ if (!m.find())
+ failCount++;
+
+ m.reset("testa\u030a");
+ if (!m.find())
+ failCount++;
+
+ // Composed char inside a char class
+ p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
+ m = p.matcher("test\u00e5");
+ if (!m.find())
+ failCount++;
+
+ m.reset("testa\u0300");
+ if (!m.find())
+ failCount++;
+
+ m.reset("testa\u030a");
+ if (!m.find())
+ failCount++;
+
+ // Marks that cannot legally change order and be equivalent
+ p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
+ check(p, "testa\u0308\u0300", true);
+ check(p, "testa\u0300\u0308", false);
+
+ // Marks that can legally change order and be equivalent
+ p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
+ check(p, "testa\u0308\u0323", true);
+ check(p, "testa\u0323\u0308", true);
+
+ // Test all equivalences of the sequence a\u0308\u0323\u0300
+ p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
+ check(p, "testa\u0308\u0323\u0300", true);
+ check(p, "testa\u0323\u0308\u0300", true);
+ check(p, "testa\u0308\u0300\u0323", true);
+ check(p, "test\u00e4\u0323\u0300", true);
+ check(p, "test\u00e4\u0300\u0323", true);
+
+ /*
+ * The following canonical equivalence tests don't work. Bug id: 4916384.
+ *
+ // Decomposed hangul (jamos)
+ p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
+ m = p.matcher("\u1100\u1161");
+ if (!m.matches())
+ failCount++;
+
+ m.reset("\uac00");
+ if (!m.matches())
+ failCount++;
+
+ // Composed hangul
+ p = Pattern.compile("\uac00", Pattern.CANON_EQ);
+ m = p.matcher("\u1100\u1161");
+ if (!m.matches())
+ failCount++;
+
+ m.reset("\uac00");
+ if (!m.matches())
+ failCount++;
+
+ // Decomposed supplementary outside char classes
+ p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
+ m = p.matcher("test\ud834\uddc0");
+ if (!m.matches())
+ failCount++;
+
+ m.reset("test\ud834\uddbc\ud834\udd6f");
+ if (!m.matches())
+ failCount++;
+
+ // Composed supplementary outside char classes
+ p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
+ m.reset("test\ud834\uddbc\ud834\udd6f");
+ if (!m.matches())
+ failCount++;
+
+ m = p.matcher("test\ud834\uddc0");
+ if (!m.matches())
+ failCount++;
+
+ */
+
+ report("Canonical Equivalence");
+ }
+
+ /**
+ * A basic sanity test of Matcher.replaceAll().
+ */
+ private static void globalSubstitute() throws Exception {
+ // Global substitution with a literal
+ Pattern p = Pattern.compile("(ab)(c*)");
+ Matcher m = p.matcher("abccczzzabcczzzabccc");
+ if (!m.replaceAll("test").equals("testzzztestzzztest"))
+ failCount++;
+
+ m.reset("zzzabccczzzabcczzzabccczzz");
+ if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
+ failCount++;
+
+ // Global substitution with groups
+ m.reset("zzzabccczzzabcczzzabccczzz");
+ String result = m.replaceAll("$1");
+ if (!result.equals("zzzabzzzabzzzabzzz"))
+ failCount++;
+
+ // Supplementary character test
+ // Global substitution with a literal
+ p = Pattern.compile(toSupplementaries("(ab)(c*)"));
+ m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
+ if (!m.replaceAll(toSupplementaries("test")).
+ equals(toSupplementaries("testzzztestzzztest")))
+ failCount++;
+
+ m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
+ if (!m.replaceAll(toSupplementaries("test")).
+ equals(toSupplementaries("zzztestzzztestzzztestzzz")))
+ failCount++;
+
+ // Global substitution with groups
+ m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
+ result = m.replaceAll("$1");
+ if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
+ failCount++;
+
+ report("Global Substitution");
+ }
+
+ /**
+ * Tests the usage of Matcher.appendReplacement() with literal
+ * and group substitutions.
+ */
+ private static void stringbufferSubstitute() throws Exception {
+ // SB substitution with literal
+ String blah = "zzzblahzzz";
+ Pattern p = Pattern.compile("blah");
+ Matcher m = p.matcher(blah);
+ StringBuffer result = new StringBuffer();
+ try {
+ m.appendReplacement(result, "blech");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "blech");
+ if (!result.toString().equals("zzzblech"))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals("zzzblechzzz"))
+ failCount++;
+
+ // SB substitution with groups
+ blah = "zzzabcdzzz";
+ p = Pattern.compile("(ab)(cd)*");
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ try {
+ m.appendReplacement(result, "$1");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "$1");
+ if (!result.toString().equals("zzzab"))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals("zzzabzzz"))
+ failCount++;
+
+ // SB substitution with 3 groups
+ blah = "zzzabcdcdefzzz";
+ p = Pattern.compile("(ab)(cd)*(ef)");
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ try {
+ m.appendReplacement(result, "$1w$2w$3");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "$1w$2w$3");
+ if (!result.toString().equals("zzzabwcdwef"))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals("zzzabwcdwefzzz"))
+ failCount++;
+
+ // SB substitution with groups and three matches
+ // skipping middle match
+ blah = "zzzabcdzzzabcddzzzabcdzzz";
+ p = Pattern.compile("(ab)(cd*)");
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ try {
+ m.appendReplacement(result, "$1");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "$1");
+ if (!result.toString().equals("zzzab"))
+ failCount++;
+
+ m.find();
+ m.find();
+ m.appendReplacement(result, "$2");
+ if (!result.toString().equals("zzzabzzzabcddzzzcd"))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
+ failCount++;
+
+ // Check to make sure escaped $ is ignored
+ blah = "zzzabcdcdefzzz";
+ p = Pattern.compile("(ab)(cd)*(ef)");
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ m.find();
+ m.appendReplacement(result, "$1w\\$2w$3");
+ if (!result.toString().equals("zzzabw$2wef"))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals("zzzabw$2wefzzz"))
+ failCount++;
+
+ // Check to make sure a reference to nonexistent group causes error
+ blah = "zzzabcdcdefzzz";
+ p = Pattern.compile("(ab)(cd)*(ef)");
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ m.find();
+ try {
+ m.appendReplacement(result, "$1w$5w$3");
+ failCount++;
+ } catch (IndexOutOfBoundsException ioobe) {
+ // Correct result
+ }
+
+ // Check double digit group references
+ blah = "zzz123456789101112zzz";
+ p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ m.find();
+ m.appendReplacement(result, "$1w$11w$3");
+ if (!result.toString().equals("zzz1w11w3"))
+ failCount++;
+
+ // Check to make sure it backs off $15 to $1 if only three groups
+ blah = "zzzabcdcdefzzz";
+ p = Pattern.compile("(ab)(cd)*(ef)");
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ m.find();
+ m.appendReplacement(result, "$1w$15w$3");
+ if (!result.toString().equals("zzzabwab5wef"))
+ failCount++;
+
+
+ // Supplementary character test
+ // SB substitution with literal
+ blah = toSupplementaries("zzzblahzzz");
+ p = Pattern.compile(toSupplementaries("blah"));
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ try {
+ m.appendReplacement(result, toSupplementaries("blech"));
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, toSupplementaries("blech"));
+ if (!result.toString().equals(toSupplementaries("zzzblech")))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
+ failCount++;
+
+ // SB substitution with groups
+ blah = toSupplementaries("zzzabcdzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ try {
+ m.appendReplacement(result, "$1");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "$1");
+ if (!result.toString().equals(toSupplementaries("zzzab")))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals(toSupplementaries("zzzabzzz")))
+ failCount++;
+
+ // SB substitution with 3 groups
+ blah = toSupplementaries("zzzabcdcdefzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ try {
+ m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
+ if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
+ failCount++;
+
+ // SB substitution with groups and three matches
+ // skipping middle match
+ blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ try {
+ m.appendReplacement(result, "$1");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "$1");
+ if (!result.toString().equals(toSupplementaries("zzzab")))
+ failCount++;
+
+ m.find();
+ m.find();
+ m.appendReplacement(result, "$2");
+ if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
+ failCount++;
+
+ // Check to make sure escaped $ is ignored
+ blah = toSupplementaries("zzzabcdcdefzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ m.find();
+ m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
+ if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
+ failCount++;
+
+ // Check to make sure a reference to nonexistent group causes error
+ blah = toSupplementaries("zzzabcdcdefzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ m.find();
+ try {
+ m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
+ failCount++;
+ } catch (IndexOutOfBoundsException ioobe) {
+ // Correct result
+ }
+
+ // Check double digit group references
+ blah = toSupplementaries("zzz123456789101112zzz");
+ p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ m.find();
+ m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
+ if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
+ failCount++;
+
+ // Check to make sure it backs off $15 to $1 if only three groups
+ blah = toSupplementaries("zzzabcdcdefzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
+ m = p.matcher(blah);
+ result = new StringBuffer();
+ m.find();
+ m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
+ if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
+ failCount++;
+
+ // Check nothing has been appended into the output buffer if
+ // the replacement string triggers IllegalArgumentException.
+ p = Pattern.compile("(abc)");
+ m = p.matcher("abcd");
+ result = new StringBuffer();
+ m.find();
+ try {
+ m.appendReplacement(result, ("xyz$g"));
+ failCount++;
+ } catch (IllegalArgumentException iae) {
+ if (result.length() != 0)
+ failCount++;
+ }
+
+ report("SB Substitution");
+ }
+
+ /*
+ * 5 groups of characters are created to make a substitution string.
+ * A base string will be created including random lead chars, the
+ * substitution string, and random trailing chars.
+ * A pattern containing the 5 groups is searched for and replaced with:
+ * random group + random string + random group.
+ * The results are checked for correctness.
+ */
+ private static void substitutionBasher() {
+ for (int runs = 0; runs<1000; runs++) {
+ // Create a base string to work in
+ int leadingChars = generator.nextInt(10);
+ StringBuffer baseBuffer = new StringBuffer(100);
+ String leadingString = getRandomAlphaString(leadingChars);
+ baseBuffer.append(leadingString);
+
+ // Create 5 groups of random number of random chars
+ // Create the string to substitute
+ // Create the pattern string to search for
+ StringBuffer bufferToSub = new StringBuffer(25);
+ StringBuffer bufferToPat = new StringBuffer(50);
+ String[] groups = new String[5];
+ for(int i=0; i<5; i++) {
+ int aGroupSize = generator.nextInt(5)+1;
+ groups[i] = getRandomAlphaString(aGroupSize);
+ bufferToSub.append(groups[i]);
+ bufferToPat.append('(');
+ bufferToPat.append(groups[i]);
+ bufferToPat.append(')');
+ }
+ String stringToSub = bufferToSub.toString();
+ String pattern = bufferToPat.toString();
+
+ // Place sub string into working string at random index
+ baseBuffer.append(stringToSub);
+
+ // Append random chars to end
+ int trailingChars = generator.nextInt(10);
+ String trailingString = getRandomAlphaString(trailingChars);
+ baseBuffer.append(trailingString);
+ String baseString = baseBuffer.toString();
+
+ // Create test pattern and matcher
+ Pattern p = Pattern.compile(pattern);
+ Matcher m = p.matcher(baseString);
+
+ // Reject candidate if pattern happens to start early
+ m.find();
+ if (m.start() < leadingChars)
+ continue;
+
+ // Reject candidate if more than one match
+ if (m.find())
+ continue;
+
+ // Construct a replacement string with :
+ // random group + random string + random group
+ StringBuffer bufferToRep = new StringBuffer();
+ int groupIndex1 = generator.nextInt(5);
+ bufferToRep.append("$" + (groupIndex1 + 1));
+ String randomMidString = getRandomAlphaString(5);
+ bufferToRep.append(randomMidString);
+ int groupIndex2 = generator.nextInt(5);
+ bufferToRep.append("$" + (groupIndex2 + 1));
+ String replacement = bufferToRep.toString();
+
+ // Do the replacement
+ String result = m.replaceAll(replacement);
+
+ // Construct expected result
+ StringBuffer bufferToRes = new StringBuffer();
+ bufferToRes.append(leadingString);
+ bufferToRes.append(groups[groupIndex1]);
+ bufferToRes.append(randomMidString);
+ bufferToRes.append(groups[groupIndex2]);
+ bufferToRes.append(trailingString);
+ String expectedResult = bufferToRes.toString();
+
+ // Check results
+ if (!result.equals(expectedResult))
+ failCount++;
+ }
+
+ report("Substitution Basher");
+ }
+
+ /**
+ * Checks the handling of some escape sequences that the Pattern
+ * class should process instead of the java compiler. These are
+ * not in the file because the escapes should be be processed
+ * by the Pattern class when the regex is compiled.
+ */
+ private static void escapes() throws Exception {
+ Pattern p = Pattern.compile("\\043");
+ Matcher m = p.matcher("#");
+ if (!m.find())
+ failCount++;
+
+ p = Pattern.compile("\\x23");
+ m = p.matcher("#");
+ if (!m.find())
+ failCount++;
+
+ p = Pattern.compile("\\u0023");
+ m = p.matcher("#");
+ if (!m.find())
+ failCount++;
+
+ report("Escape sequences");
+ }
+
+ /**
+ * Checks the handling of blank input situations. These
+ * tests are incompatible with my test file format.
+ */
+ private static void blankInput() throws Exception {
+ Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
+ Matcher m = p.matcher("");
+ if (m.find())
+ failCount++;
+
+ p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
+ m = p.matcher("");
+ if (!m.find())
+ failCount++;
+
+ p = Pattern.compile("abc");
+ m = p.matcher("");
+ if (m.find())
+ failCount++;
+
+ p = Pattern.compile("a*");
+ m = p.matcher("");
+ if (!m.find())
+ failCount++;
+
+ report("Blank input");
+ }
+
+ /**
+ * Tests the Boyer-Moore pattern matching of a character sequence
+ * on randomly generated patterns.
+ */
+ private static void bm() throws Exception {
+ doBnM('a');
+ report("Boyer Moore (ASCII)");
+
+ doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
+ report("Boyer Moore (Supplementary)");
+ }
+
+ private static void doBnM(int baseCharacter) throws Exception {
+ int achar=0;
+
+ for (int i=0; i<100; i++) {
+ // Create a short pattern to search for
+ int patternLength = generator.nextInt(7) + 4;
+ StringBuffer patternBuffer = new StringBuffer(patternLength);
+ for (int x=0; xy+)z+"),
+ "xxxyyyzzz",
+ "gname",
+ "yyy");
+
+ //backref
+ Pattern pattern = Pattern.compile("(a*)bc\\1");
+ check(pattern, "zzzaabcazzz", true); // found "abca"
+
+ check(Pattern.compile("(?a*)bc\\k"),
+ "zzzaabcaazzz", true);
+
+ check(Pattern.compile("(?abc)(def)\\k"),
+ "abcdefabc", true);
+
+ check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?k)\\k"),
+ "abcdefghijkk", true);
+
+ // Supplementary character tests
+ check(Pattern.compile("(?" + toSupplementaries("a*)bc") + "\\k"),
+ toSupplementaries("zzzaabcazzz"), true);
+
+ check(Pattern.compile("(?" + toSupplementaries("a*)bc") + "\\k"),
+ toSupplementaries("zzzaabcaazzz"), true);
+
+ check(Pattern.compile("(?" + toSupplementaries("abc)(def)") + "\\k"),
+ toSupplementaries("abcdefabc"), true);
+
+ check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
+ "(?" +
+ toSupplementaries("k)") + "\\k"),
+ toSupplementaries("abcdefghijkk"), true);
+
+ check(Pattern.compile("x+(?y+)z+\\k"),
+ "xxxyyyzzzyyy",
+ "gname",
+ "yyy");
+
+ //replaceFirst/All
+ checkReplaceFirst("(?ab)(c*)",
+ "abccczzzabcczzzabccc",
+ "$",
+ "abzzzabcczzzabccc");
+
+ checkReplaceAll("(?ab)(c*)",
+ "abccczzzabcczzzabccc",
+ "$",
+ "abzzzabzzzab");
+
+
+ checkReplaceFirst("(?ab)(c*)",
+ "zzzabccczzzabcczzzabccczzz",
+ "$",
+ "zzzabzzzabcczzzabccczzz");
+
+ checkReplaceAll("(?ab)(c*)",
+ "zzzabccczzzabcczzzabccczzz",
+ "$",
+ "zzzabzzzabzzzabzzz");
+
+ checkReplaceFirst("(?ab)(?c*)",
+ "zzzabccczzzabcczzzabccczzz",
+ "$",
+ "zzzccczzzabcczzzabccczzz");
+
+ checkReplaceAll("(?ab)(?c*)",
+ "zzzabccczzzabcczzzabccczzz",
+ "$",
+ "zzzccczzzcczzzccczzz");
+
+ //toSupplementaries("(ab)(c*)"));
+ checkReplaceFirst("(?" + toSupplementaries("ab") +
+ ")(?" + toSupplementaries("c") + "*)",
+ toSupplementaries("abccczzzabcczzzabccc"),
+ "$",
+ toSupplementaries("abzzzabcczzzabccc"));
+
+
+ checkReplaceAll("(?" + toSupplementaries("ab") +
+ ")(?" + toSupplementaries("c") + "*)",
+ toSupplementaries("abccczzzabcczzzabccc"),
+ "$",
+ toSupplementaries("abzzzabzzzab"));
+
+ checkReplaceFirst("(?" + toSupplementaries("ab") +
+ ")(?" + toSupplementaries("c") + "*)",
+ toSupplementaries("abccczzzabcczzzabccc"),
+ "$",
+ toSupplementaries("ccczzzabcczzzabccc"));
+
+
+ checkReplaceAll("(?" + toSupplementaries("ab") +
+ ")(?" + toSupplementaries("c") + "*)",
+ toSupplementaries("abccczzzabcczzzabccc"),
+ "$",
+ toSupplementaries("ccczzzcczzzccc"));
+
+ checkReplaceFirst("(?Dog)AndCat",
+ "zzzDogAndCatzzzDogAndCatzzz",
+ "$",
+ "zzzDogzzzDogAndCatzzz");
+
+
+ checkReplaceAll("(?Dog)AndCat",
+ "zzzDogAndCatzzzDogAndCatzzz",
+ "$",
+ "zzzDogzzzDogzzz");
+
+ // backref in Matcher & String
+ if (!"abcdefghij".replaceFirst("cd(?ef)gh", "$").equals("abefij") ||
+ !"abbbcbdbefgh".replaceAll("(?[a-e])b", "$").equals("abcdefgh"))
+ failCount++;
+
+ // negative
+ checkExpectedFail("(?abc)(def)");
+ checkExpectedFail("(?abc)(def)");
+ checkExpectedFail("(?abc)(def)\\k");
+ checkExpectedFail("(?abc)(?def)\\k");
+ checkExpectedFail(Pattern.compile("(?abc)(def)").matcher("abcdef"),
+ "gnameX");
+ checkExpectedFail(Pattern.compile("(?abc)(def)").matcher("abcdef"),
+ null);
+ report("NamedGroupCapture");
+ }
+}
diff --git a/jdk/test/java/util/regex/SupplementaryTestCases.txt b/jdk/test/java/util/regex/SupplementaryTestCases.txt
new file mode 100644
index 00000000000..2f05d4fed46
--- /dev/null
+++ b/jdk/test/java/util/regex/SupplementaryTestCases.txt
@@ -0,0 +1,1434 @@
+//
+// Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//
+// --------------------------------------------------------
+// This file contains test cases with supplementary characters for regular expressions.
+// A test case consists of three lines:
+// The first line is a pattern used in the test
+// The second line is the input to search for the pattern in
+// The third line is a concatentation of the match, the number of groups,
+// and the contents of the first four subexpressions.
+// Empty lines and lines beginning with comment slashes are ignored.
+
+// Test unsetting of backed off groups
+^(\ud800\udc61)?\ud800\udc61
+\ud800\udc61
+true \ud800\udc61 1
+
+^(\ud800\udc61\ud800)?\ud800\udc61\ud800
+\ud800\udc61\ud800
+true \ud800\udc61\ud800 1
+
+^(\ud800\udc61\ud800\udc61(\ud800\udc62\ud800\udc62)?)+$
+\ud800\udc61\ud800\udc61\ud800\udc62\ud800\udc62\ud800\udc61\ud800\udc61
+true \ud800\udc61\ud800\udc61\ud800\udc62\ud800\udc62\ud800\udc61\ud800\udc61 2 \ud800\udc61\ud800\udc61 \ud800\udc62\ud800\udc62
+
+^(\ud800\udc61\ud800\udc61\ud800(\ud800\udc62\ud800\udc62\ud800)?)+$
+\ud800\udc61\ud800\udc61\ud800\ud800\udc62\ud800\udc62\ud800\ud800\udc61\ud800\udc61\ud800
+true \ud800\udc61\ud800\udc61\ud800\ud800\udc62\ud800\udc62\ud800\ud800\udc61\ud800\udc61\ud800 2 \ud800\udc61\ud800\udc61\ud800 \ud800\udc62\ud800\udc62\ud800
+
+((\ud800\udc61|\ud800\udc62)?\ud800\udc62)+
+\ud800\udc62
+true \ud800\udc62 2 \ud800\udc62
+
+((\ud800|\ud800\udc62)?\ud800\udc62)+
+\ud800\udc62
+true \ud800\udc62 2 \ud800\udc62
+
+(\ud800\udc61\ud800\udc61\ud800\udc61)?\ud800\udc61\ud800\udc61\ud800\udc61
+\ud800\udc61\ud800\udc61\ud800\udc61
+true \ud800\udc61\ud800\udc61\ud800\udc61 1
+
+(\ud800\udc61\ud800\udc61\ud800\ud800\udc61)?\ud800\udc61\ud800\udc61\ud800\ud800\udc61
+\ud800\udc61\ud800\udc61\ud800\ud800\udc61
+true \ud800\udc61\ud800\udc61\ud800\ud800\udc61 1
+
+^(\ud800\udc61\ud800(\ud800\udc62\ud800)?)+$
+\ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800
+true \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 2 \ud800\udc61\ud800 \ud800\udc62\ud800
+
+^(\ud800\udc61(\ud800\udc62)?)+$
+\ud800\udc61\ud800\udc62\ud800\udc61
+true \ud800\udc61\ud800\udc62\ud800\udc61 2 \ud800\udc61 \ud800\udc62
+
+^(\ud800\udc61\ud800(\ud800\udc62\ud800)?)+$
+\ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800
+true \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 2 \ud800\udc61\ud800 \ud800\udc62\ud800
+
+^(\ud800\udc61(\ud800\udc62(\ud800\udc63)?)?)?\ud800\udc61\ud800\udc62\ud800\udc63
+\ud800\udc61\ud800\udc62\ud800\udc63
+true \ud800\udc61\ud800\udc62\ud800\udc63 3
+
+^(\ud800\udc61\ud800(\ud800\udc62(\ud800\udc63)?)?)?\ud800\udc61\ud800\ud800\udc62\ud800\udc63
+\ud800\udc61\ud800\ud800\udc62\ud800\udc63
+true \ud800\udc61\ud800\ud800\udc62\ud800\udc63 3
+
+^(\ud800\udc61(\ud800\udc02(\ud800\udc63))).*
+\ud800\udc61\ud800\udc02\ud800\udc63
+true \ud800\udc61\ud800\udc02\ud800\udc63 3 \ud800\udc61\ud800\udc02\ud800\udc63 \ud800\udc02\ud800\udc63 \ud800\udc63
+
+^(\ud800\udc61(\ud800(\ud800\udc63))).*
+\ud800\udc61\ud800\ud800\udc63
+true \ud800\udc61\ud800\ud800\udc63 3 \ud800\udc61\ud800\ud800\udc63 \ud800\ud800\udc63 \ud800\udc63
+
+// Patterns including no surrogates
+(.)([^a])xyz
+\ud801\ud800\udc00xyz
+true \ud801\ud800\udc00xyz 2 \ud801 \ud800\udc00
+
+[^a-z]..
+\ud801\ud800\udc00xyz
+true \ud801\ud800\udc00x 0
+
+.$
+\ud801\ud800\udc00
+true \ud800\udc00 0
+
+.$
+\ud801\udc01\ud800\udc00
+true \ud800\udc00 0
+
+.$
+\ud801\udc01\ud800\udc00\udcff
+true \udcff 0
+
+[^x-\uffff][^y-\uffff]
+\ud800\udc00pqr
+true \ud800\udc00p 0
+
+[^x-\uffff]+
+\ud800\udc00pqrx
+true \ud800\udc00pqr 0
+
+/// The following test cases fail due to use of Start rather than
+/// StartS. Disabled for now.
+///[a-\uffff]
+///\ud800\udc00x
+///true x 0
+///
+///[a-\uffff]
+///\ud800\udc00
+///false 0
+
+// use of x modifier
+\ud800\udc61bc(?x)bl\ud800\udc61h
+\ud800\udc61bcbl\ud800\udc61h
+true \ud800\udc61bcbl\ud800\udc61h 0
+
+\ud800\udc61bc(?x) bl\ud800\udc61h
+\ud800\udc61bcbl\ud800\udc61h
+true \ud800\udc61bcbl\ud800\udc61h 0
+
+\ud800\udc61bc(?x) bl\ud800\udc61h blech
+\ud800\udc61bcbl\ud800\udc61hblech
+true \ud800\udc61bcbl\ud800\udc61hblech 0
+
+\ud800\udc61bc(?x) bl\ud800\udc61h # ignore comment
+\ud800\udc61bcbl\ud800\udc61h
+true \ud800\udc61bcbl\ud800\udc61h 0
+
+// Simple alternation
+\ud800\udc61|\ud800\udc62
+\ud800\udc61
+true \ud800\udc61 0
+
+\ud800\udc61|\ud800\udc62|\ud800
+\ud800\udc61
+true \ud800\udc61 0
+
+\ud800\udc61|\ud800
+\ud800\udc62
+false 0
+
+\ud800\udc62|\ud800
+\ud800
+true \ud800 0
+
+\ud800\udc61|\ud802\udc02
+z
+false 0
+
+\ud800\udc61|\ud802\udc02
+\ud802\udc02
+true \ud802\udc02 0
+
+\ud800\udc61|\ud802\udc02|\ud803\udc03\ud804\udc04
+\ud803\udc03\ud804\udc04
+true \ud803\udc03\ud804\udc04 0
+
+\ud800\udc61|\ud800\udc61d
+\ud800\udc61d
+true \ud800\udc61 0
+
+z(\ud800\udc61|\ud800\udc61c)\ud802\udc02
+z\ud800\udc61c\ud802\udc02
+true z\ud800\udc61c\ud802\udc02 1 \ud800\udc61c
+
+z(\ud800\udc61|\ud800\udc61c|\udc61c)\ud802\udc02
+z\udc61c\ud802\udc02
+true z\udc61c\ud802\udc02 1 \udc61c
+
+// Simple codepoint class
+[\ud800\udc61\ud802\udc02c]+
+\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02
+true \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0
+
+[\ud800\udc61\ud802\udc02c]+
+\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02
+true \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0
+
+[\ud800\udc61\ud802\udc02c\ud800]+
+\ud800\udc61\ud802\udc02\ud800\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02
+true \ud800\udc61\ud802\udc02\ud800\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0
+
+[\ud800\udc61bc]+
+d\ud800\udc62fg
+false 0
+
+[\ud800\udc61bc]+[\ud804\udc04ef]+[\ud807\udc07hi]+
+zzz\ud800\udc61\ud800\udc61\ud804\udc04\ud804\udc04\ud807\udc07\ud807\udc07zzz
+true \ud800\udc61\ud800\udc61\ud804\udc04\ud804\udc04\ud807\udc07\ud807\udc07 0
+
+// Range codepoint class
+[\ud801\udc01-\ud807\udc07]+
+\ud8ff\udcff\ud8ff\udcff\ud8ff\udcff\ud807\udc07\ud807\udc07\ud807\udc07
+true \ud807\udc07\ud807\udc07\ud807\udc07 0
+
+[\ud801\udc01-\ud807\udc07]+
+mmm
+false 0
+
+[\ud800\udc61-]+
+z\ud800\udc61-9z
+true \ud800\udc61- 0
+
+// Negated char class
+[^\ud800\udc61\ud802\udc02c]+
+\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02
+false 0
+
+[^\ud800\udc61\ud802\udc02\ud803\udc03]+
+\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02\ud802\udc02\ud802\udc02\ud803\udc03\ud803\udc03\ud803\udc03\ud804\udc04efg
+true \ud804\udc04efg 0
+
+[^\ud800\udc61\ud802\udc02\ud803\udc03\ud800]+
+\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02\ud802\udc02\ud802\udc02\ud803\udc03\ud803\udc03\ud803\udc03\ud804\udc04efg
+true \ud804\udc04efg 0
+
+// Making sure a ^ not in first position matches literal ^
+[\ud801\udc01\ud802\udc02\ud803\udc03^\ud802\udc02]
+\ud802\udc02
+true \ud802\udc02 0
+
+[\ud801\udc01\ud802\udc02\ud803\udc03^\ud802\udc02]
+^
+true ^ 0
+
+// Class union and intersection
+[\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]]
+\ud802\udc02
+true \ud802\udc02 0
+
+[\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]]
+\ud805\udc05
+true \ud805\udc05 0
+
+[\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]]
+\ud801\udc01
+true \ud801\udc01 0
+
+[\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]]
+\ud80c\udc0c
+true \ud80c\udc0c 0
+
+[\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]]
+4
+true 4 0
+
+[\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]]
+\ud805\udc05
+false 0
+
+[\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]]
+\ud816\udc16
+false 0
+
+[[\ud801\udc01-\ud804\udc04][0-9][\ud80b\udc0b-\ud80d\udc0d]]
+\ud802\udc02
+true \ud802\udc02 0
+
+[[\ud801\udc01-\ud804\udc04][0-9][\ud80b\udc0b-\ud80d\udc0d]]
+\ud81a\udc1a
+false 0
+
+[\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]]
+\ud801\udc01
+true \ud801\udc01 0
+
+[\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]]
+\ud805\udc05
+true \ud805\udc05 0
+
+[\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]]
+\ud808\udc08
+true \ud808\udc08 0
+
+[\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]]
+\ud80d\udc0d
+false 0
+
+[\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]\ud80d\udc0d]
+\ud80d\udc0d
+true \ud80d\udc0d 0
+
+[\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09]
+\ud801\udc01
+true \ud801\udc01 0
+
+[\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09]
+\ud804\udc04
+true \ud804\udc04 0
+
+[\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09]
+\ud808\udc08
+true \ud808\udc08 0
+
+[\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09]
+\ud816\udc16
+false 0
+
+[\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]]
+\ud801\udc01
+false 0
+
+[\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]]
+\ud805\udc05
+false 0
+
+[\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]]
+\ud81a\udc1a
+false 0
+
+[[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]]
+\ud801\udc01
+false 0
+
+[[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]]
+\ud805\udc05
+false 0
+
+[[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]]
+\ud81a\udc1a
+false 0
+
+[\ud801\udc01-\ud803\udc03&&\ud804\udc04-\ud806\udc06]
+\ud801\udc01
+false 0
+
+[\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a]
+\ud80d\udc0d
+true \ud80d\udc0d 0
+
+[\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a&&\ud801\udc01-\ud803\udc03]
+\ud80d\udc0d
+false 0
+
+[\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a&&\ud801\udc01-\ud81a\udc1a]
+\ud80d\udc0d
+true \ud80d\udc0d 0
+
+[[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]]
+\ud801\udc01
+false 0
+
+[[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]]
+\ud80d\udc0d
+true \ud80d\udc0d 0
+
+[[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]]
+\ud81a\udc1a
+false 0
+
+[[\ud801\udc01-\ud80d\udc0d]&&[^\ud801\udc01-\ud803\udc03]]
+\ud801\udc01
+false 0
+
+[[\ud801\udc01-\ud80d\udc0d]&&[^\ud801\udc01-\ud803\udc03]]
+\ud804\udc04
+true \ud804\udc04 0
+
+[\ud801\udc01-\ud80d\udc0d&&[^\ud801\udc01-\ud803\udc03]]
+\ud801\udc01
+false 0
+
+[\ud801\udc01-\ud80d\udc0d&&[^\ud801\udc01-\ud803\udc03]]
+\ud804\udc04
+true \ud804\udc04 0
+
+[\ud801\udc01-\ud803\udc03\ud804\udc04-\ud806\udc06&&[\ud804\udc04-\ud806\udc06]]
+\ud801\udc01
+false 0
+
+[\ud801\udc01-\ud803\udc03\ud804\udc04-\ud806\udc06&&[\ud804\udc04-\ud806\udc06]]
+\ud805\udc05
+true \ud805\udc05 0
+
+[[\ud801\udc01-\ud803\udc03]&&\ud804\udc04-\ud806\udc06\ud801\udc01-\ud803\udc03]
+\ud801\udc01
+true \ud801\udc01 0
+
+[[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06][\ud801\udc01-\ud803\udc03]]
+\ud801\udc01
+true \ud801\udc01 0
+
+[[\ud801\udc01-\ud803\udc03][\ud804\udc04-\ud806\udc06]&&\ud801\udc01\ud802\udc02\ud803\udc03]
+\ud801\udc01
+true \ud801\udc01 0
+
+[[\ud801\udc01-\ud803\udc03][\ud804\udc04-\ud806\udc06]&&\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]]
+\ud805\udc05
+true \ud805\udc05 0
+
+[[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04]&&[\ud803\udc03-\ud805\udc05]]
+\ud801\udc01
+false 0
+
+[[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04]&&[\ud803\udc03-\ud805\udc05]]
+\ud803\udc03
+true \ud803\udc03 0
+
+[[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04][\ud803\udc03-\ud805\udc05]&&[\ud815\udc15-\ud81a\udc1a]]
+\ud803\udc03
+false 0
+
+[\ud801\udc01\ud802\udc02\ud803\udc03[^\ud802\udc02\ud803\udc03\ud804\udc04]]
+\ud801\udc01
+true \ud801\udc01 0
+
+[\ud800\udc61\ud802\udc02\ud803\udc03[^\ud802\udc02\ud803\udc03\ud804\udc04]]
+\ud804\udc04
+false 0
+
+[\ud801\udc01-\ud803\udc03&&\ud801\udc01-\ud804\udc04&&\ud801\udc01-\ud805\udc05\ud807\udc07\ud808\udc08\ud809\udc09]
+\ud802\udc02
+true \ud802\udc02 0
+
+[\ud801\udc01-\ud803\udc03&&\ud801\udc01-\ud804\udc04&&\ud801\udc01-\ud805\udc05\ud807\udc07\ud808\udc08\ud809\udc09]
+\ud807\udc07
+false 0
+
+[[\ud801\udc01[\ud802\udc02]]&&[\ud802\udc02[\ud801\udc01]]]
+\ud801\udc01
+true \ud801\udc01 0
+
+// Unicode isn't supported in clazz()
+[[\ud800\udc61]&&[b][c][\ud800\udc61]&&[^d]]
+\ud800\udc61
+true \ud800\udc61 0
+
+[[\ud800\udc61]&&[\ud802\udc02][\ud800][\ud800\udc61]&&[^\ud804\udc04]]
+\ud800\udc61
+true \ud800\udc61 0
+
+[[\ud800\udc61]&&[b][\ud800][\ud800\udc61]&&[^\ud804\udc04]]
+\ud804\udc04
+false 0
+
+[[\ud800\udc61]&&[b][c][\ud800\udc61]&&[^d]]
+d
+false 0
+
+[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]]
+\ud800\udc01
+false 0
+
+[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]]
+\ud800\udc03
+true \ud800\udc03 0
+
+[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]]
+\ud800\udc03
+true \ud800\udc03 0
+
+[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03]
+\ud800\udc03
+true \ud800\udc03 0
+
+[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03&&\ud800\udc03]
+\ud800\udc03
+true \ud800\udc03 0
+
+[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03&&[\ud800\udc03\ud800\udc04\ud800\udc05]]
+\ud800\udc03
+true \ud800\udc03 0
+
+[z[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04]]
+\ud800\udc03
+true \ud800\udc03 0
+
+[z[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04]&&[u-z]]
+z
+true z 0
+
+[x[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04[z]]&&[u-z]]
+z
+false 0
+
+[x[[wz]\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04[z]]&&[u-z]]
+z
+true z 0
+
+[[\ud800\udc61b\ud800\udc03]&&[\ud800\udc04\ud800\udc05f]\ud800\udc61b\ud800\udc03]
+\ud800\udc61
+true \ud800\udc61 0
+
+[[\ud800\udc61b\ud800\udc03]&&[\ud800\udc04\ud800\udc05f]xyz[\ud800\udc61b\ud800\udc03]]
+\ud800\udc61
+true \ud800\udc61 0
+
+\pL
+\ud800\udc00
+true \ud800\udc00 0
+
+\p{IsASCII}
+\ud800\udc00
+false 0
+
+\pLbc
+\ud800\udc00bc
+true \ud800\udc00bc 0
+
+\ud800\udc61[r\p{InGreek}]c
+\ud800\udc61\u0370c
+true \ud800\udc61\u0370c 0
+
+\ud800\udc61\p{InGreek}
+\ud800\udc61\u0370
+true \ud800\udc61\u0370 0
+
+\ud800\udc61\P{InGreek}
+\ud800\udc61\u0370
+false 0
+
+\ud800\udc61\P{InGreek}
+\ud800\udc61b
+true \ud800\udc61b 0
+
+\ud800\udc61{^InGreek}
+-
+error
+
+\ud800\udc61\p{^InGreek}
+-
+error
+
+\ud800\udc61\P{^InGreek}
+-
+error
+
+\ud800\udc61\p{InGreek}
+\ud800\udc61\u0370
+true \ud800\udc61\u0370 0
+
+\ud800\udc61[\p{InGreek}]c
+\ud800\udc61\u0370c
+true \ud800\udc61\u0370c 0
+
+\ud800\udc61[\P{InGreek}]c
+\ud800\udc61\u0370c
+false 0
+
+\ud800\udc61[\P{InGreek}]c
+\ud800\udc61bc
+true \ud800\udc61bc 0
+
+\ud800\udc61[{^InGreek}]c
+\ud800\udc61nc
+true \ud800\udc61nc 0
+
+\ud800\udc61[{^InGreek}]c
+\ud800\udc61zc
+false 0
+
+\ud800\udc61[\p{^InGreek}]c
+-
+error
+
+\ud800\udc61[\P{^InGreek}]c
+-
+error
+
+\ud800\udc61[\p{InGreek}]
+\ud800\udc61\u0370
+true \ud800\udc61\u0370 0
+
+\ud800\udc61[r\p{InGreek}]c
+\ud800\udc61rc
+true \ud800\udc61rc 0
+
+\ud800\udc61[\p{InGreek}r]c
+\ud800\udc61rc
+true \ud800\udc61rc 0
+
+\ud800\udc61[r\p{InGreek}]c
+\ud800\udc61rc
+true \ud800\udc61rc 0
+
+\ud800\udc61[^\p{InGreek}]c
+\ud800\udc61\u0370c
+false 0
+
+\ud800\udc61[^\P{InGreek}]c
+\ud800\udc61\u0370c
+true \ud800\udc61\u0370c 0
+
+\ud800\udc61[\p{InGreek}&&[^\u0370]]c
+\ud800\udc61\u0370c
+false 0
+
+// Test the dot metacharacter
+\ud800\udc61.c.+
+\ud800\udc61#c%&
+true \ud800\udc61#c%& 0
+
+\ud800\udc61b.
+\ud800\udc61b\n
+false 0
+
+(?s)\ud800\udc61b.
+\ud800\udc61b\n
+true \ud800\udc61b\n 0
+
+\ud800\udc61[\p{L}&&[\P{InGreek}]]c
+\ud800\udc61\u6000c
+true \ud800\udc61\u6000c 0
+
+\ud800\udc61[\p{L}&&[\P{InGreek}]]c
+\ud800\udc61rc
+true \ud800\udc61rc 0
+
+\ud800\udc61[\p{L}&&[\P{InGreek}]]c
+\ud800\udc61\u0370c
+false 0
+
+\ud800\udc61\p{InGreek}c
+\ud800\udc61\u0370c
+true \ud800\udc61\u0370c 0
+
+\ud800\udc61\p{Sc}
+\ud800\udc61$
+true \ud800\udc61$ 0
+
+// Test \p{L}
+\p{L}
+\ud800\udf1e
+true \ud800\udf1e 0
+
+^a\p{L}z$
+a\ud800\udf1ez
+true a\ud800\udf1ez 0
+
+// Test \P{InDeseret}
+
+\ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\P{InDeseret}
+\ud800\udf00\ud800\udf1e\ud800\udf1esupp->\ud900\udc00<-\ud901\udf00
+true \ud800\udf00\ud800\udf1e\ud800\udf1esupp->\ud900\udc00<-\ud901\udf00 0
+
+\ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\P{InDeseret}
+\ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud901\udf00
+true \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud901\udf00 0
+
+// Test \p{InDeseret}
+\ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\p{InDeseret}
+\ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud801\udc00
+true \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud801\udc00 0
+
+// Test the word char escape sequence
+\ud800\udc61b\wc
+\ud800\udc61bcc
+true \ud800\udc61bcc 0
+
+\ud800\udc61bc[\w]
+\ud800\udc61bcd
+true \ud800\udc61bcd 0
+
+\ud800\udc61bc[\sdef]*
+\ud800\udc61bc def
+true \ud800\udc61bc def 0
+
+\ud800\udc61bc[\sy-z]*
+\ud800\udc61bc y z
+true \ud800\udc61bc y z 0
+
+\ud800\udc01bc[\ud800\udc01-\ud800\udc04\sm-p]*
+\ud800\udc01bc\ud800\udc01\ud800\udc01 mn p
+true \ud800\udc01bc\ud800\udc01\ud800\udc01 mn p 0
+
+// Test the whitespace escape sequence
+\ud800\udc61b\s\ud800\udc03
+\ud800\udc61b \ud800\udc03
+true \ud800\udc61b \ud800\udc03 0
+
+\s\s\s
+bl\ud800\udc61h err
+false 0
+
+\S\S\s
+bl\ud800\udc61h err
+true \ud800\udc61h 0
+
+// Test the digit escape sequence
+\ud800\udc61b\d\ud800\udc03
+\ud800\udc61b9\ud800\udc03
+true \ud800\udc61b9\ud800\udc03 0
+
+\d\d\d
+bl\ud800\udc61h45
+false 0
+
+// Test the caret metacharacter
+^\ud800\udc61bc
+\ud800\udc61bcdef
+true \ud800\udc61bc 0
+
+^\ud800\udc61bc
+bcd\ud800\udc61bc
+false 0
+
+// Greedy ? metacharacter
+\ud800\udc61?\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc02 0
+
+\udc61?\ud800\udc02
+\ud800\udc61\udc61\udc61\ud800\udc02
+true \udc61\ud800\udc02 0
+
+\ud800\udc61?\ud800\udc02
+\ud800\udc02
+true \ud800\udc02 0
+
+\ud800?\ud800\udc02
+\ud800\udc02
+true \ud800\udc02 0
+
+\ud800\udc61?\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc03\ud800\udc03\ud800\udc03
+false 0
+
+.?\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc02 0
+
+// Reluctant ? metacharacter
+\ud800\udc61??\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc02 0
+
+\ud800??\ud800\udc02
+\ud800\ud800\ud8001\ud800\ud800\udc02
+true \ud800\ud800\udc02 0
+
+\ud800\udc61??\ud800\udc02
+\ud800\udc02
+true \ud800\udc02 0
+
+\ud800??\ud800\udc02
+\ud800\udc02
+true \ud800\udc02 0
+
+\ud800\udc61??\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61ccc
+false 0
+
+.??\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc02 0
+
+// Possessive ? metacharacter
+\ud800\udc61?+\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc02 0
+
+\ud800\udc61?+\ud800\udc02
+\ud800\udc02
+true \ud800\udc02 0
+
+\ud800\udc61?+\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61ccc
+false 0
+
+.?+\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc02 0
+
+// Greedy + metacharacter
+\ud800\udc61+\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0
+
+\udc61+\ud800\udc02
+\ud800\udc61\udc61\udc61\udc61\ud800\udc02
+true \udc61\udc61\udc61\ud800\udc02 0
+
+\ud800\udc61+\ud800\udc02
+\ud800\udc02
+false 0
+
+\ud800+\ud800\udc02
+\ud800\udc02
+false 0
+
+\ud800\udc61+\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61ccc
+false 0
+
+.+\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0
+
+.+\ud800\udc02
+\ud800\udc61\udc61\udc61\udc61\ud800\udc02
+true \ud800\udc61\udc61\udc61\udc61\ud800\udc02 0
+
+// Reluctant + metacharacter
+\ud800\udc61+?\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0
+
+\udc61+?\ud800\udc02
+\udc61\udc61\udc61\udc61\ud800\udc02
+true \udc61\udc61\udc61\udc61\ud800\udc02 0
+
+\ud800\udc61+?\ud800\udc02
+\ud800\udc02
+false 0
+
+\ud800+?\ud800\udc02
+\ud800\udc02
+false 0
+
+\ud800\udc61+?\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61ccc
+false 0
+
+.+?\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0
+
+// Possessive + metacharacter
+\ud800\udc61++\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0
+
+\ud800\udc61++\ud800\udc02
+\ud800\udc02
+false 0
+
+\ud800\udc61++\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61ccc
+false 0
+
+.++\ud800\udc02
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
+false 0
+
+// Greedy Repetition
+\ud800\udc61{2,3}
+\ud800\udc61
+false 0
+
+\ud800\udc61{2,3}
+\ud800\udc61\ud800\udc61
+true \ud800\udc61\ud800\udc61 0
+
+\ud800\udc61{2,3}
+\ud800\udc61\ud800\udc61\ud800\udc61
+true \ud800\udc61\ud800\udc61\ud800\udc61 0
+
+\ud800\udc61{2,3}
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61
+true \ud800\udc61\ud800\udc61\ud800\udc61 0
+
+\ud800\udc61{3,}
+zzz\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61zzz
+true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61 0
+
+\ud800\udc61{3,}
+zzz\ud800\udc61\ud800\udc61zzz
+false 0
+
+// Reluctant Repetition
+\ud800\udc61{2,3}?
+\ud800\udc61
+false 0
+
+\ud800\udc61{2,3}?
+\ud800\udc61\ud800\udc61
+true \ud800\udc61\ud800\udc61 0
+
+\ud800\udc61{2,3}?
+\ud800\udc61\ud800\udc61\ud800\udc61
+true \ud800\udc61\ud800\udc61 0
+
+\ud800\udc61{2,3}?
+\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61
+true \ud800\udc61\ud800\udc61 0
+
+// Zero width Positive lookahead
+\ud800\udc61\ud802\udc02\ud803\udc03(?=\ud804\udc04)
+zzz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04
+true \ud800\udc61\ud802\udc02\ud803\udc03 0
+
+\ud800\udc61\ud802\udc02\ud803\udc03(?=\ud804\udc04)
+zzz\ud800\udc61\ud802\udc02\ud803\udc03e\ud804\udc04
+false 0
+
+\ud800\udc61\ud802\udc02\ud803\udc03(?=\udcff\ud804\udc04)
+zzz\ud800\udc61\ud802\udc02\ud803\udc03\udcff\ud804\udc04
+true \ud800\udc61\ud802\udc02\ud803\udc03 0
+
+\ud800\udc61\ud802\udc02\ud803\udc03(?=\udcff\ud804\udc04)
+zzz\ud800\udc61\ud802\udc02\ud803\udc03\ud8ff\udcff\ud804\udc04
+false 0
+
+// Zero width Negative lookahead
+\ud800\udc61\ud802\udc02\ud803\udc03(?!\ud804\udc04)
+zz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04
+false 0
+
+a\ud802\udc02\ud803\udc03(?!\ud804\udc04)
+zza\ud802\udc02\ud803\udc03\udc04\ud804\udc04
+true a\ud802\udc02\ud803\udc03 0
+
+\ud800\udc61\ud802\udc02\ud803\udc03(?!\ud804\udc04\ud8ff)
+zz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04\ud8ffX
+false 0
+
+a\ud802\udc02\ud803\udc03(?!\ud804\udc04\ud8ff)
+zza\ud802\udc02\ud803\udc03e\ud804\udc04\ud8ff\udcff
+true a\ud802\udc02\ud803\udc03 0
+
+// Zero width Positive lookbehind
+(?<=\ud801\udc01\ud802\udc02)\ud803\udc03
+\ud801\udc01\ud802\udc02\ud803\udc03
+true \ud803\udc03 0
+
+// Zero width Negative lookbehind
+(?3
+// So that the BM optimization is part of test
+\Q***\E\ud801\udc01\ud802\udc02\ud800\udc03
+***\ud801\udc01\ud802\udc02\ud800\udc03
+true ***\ud801\udc01\ud802\udc02\ud800\udc03 0
+
+\ud802\udc02l\Q***\E\ud801\udc01\ud802\udc02\ud800\udc03
+\ud802\udc02l***\ud801\udc01\ud802\udc02\ud800\udc03
+true \ud802\udc02l***\ud801\udc01\ud802\udc02\ud800\udc03 0
+
+\Q***\ud801\udc01\ud802\udc02\ud800\udc03
+***\ud801\udc01\ud802\udc02\ud800\udc03
+true ***\ud801\udc01\ud802\udc02\ud800\udc03 0
+
+\ud802\udc02l\ud801\udc01h\Q***\E\ud801\udc01\ud802\udc02\ud800\udc03
+\ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03
+true \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 0
+
+\Q***\ud801\udc01\ud802\udc02\ud800\udc03
+***\ud801\udc01\ud802\udc02\ud800\udc03
+true ***\ud801\udc01\ud802\udc02\ud800\udc03 0
+
+\Q*\ud801\udc01\ud802\udc02
+*\ud801\udc01\ud802\udc02
+true *\ud801\udc01\ud802\udc02 0
+
+\ud802\udc02l\ud801\udc01h\Q***\ud801\udc01\ud802\udc02\ud800\udc03
+\ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03
+true \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 0
+
+\ud802\udc02l\ud801\udc01\Q***\ud801\udc01\ud802\udc02\ud800\udc03
+\ud802\udc02l\ud801\udc01***\ud801\udc01\ud802\udc02\ud800\udc03
+true \ud802\udc02l\ud801\udc01***\ud801\udc01\ud802\udc02\ud800\udc03 0
+
+//Test cases below copied from i18n QE's RegexSupplementaryTests.txt
+\uD800\uDFFF\uD801\uDFF1\uDB00\uDC00
+\uD800\uDFFF\uD801\uDFF1\uDB00\uDC00
+true \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 0
+
+\uD800\uDFFF\uD801\uDFF1\uDB00\uDC00
+\u1000\uD801\uDFF1\uDB00\uDC00
+false 0
+
+\uD800\uDFFF\uD801\uDFF1\uDB00\uDC00
+\uD800\uDFFF\uFFFF\uDB00\uDC00
+false 0
+
+\uD800\uDFFF\uD801\uDFF1\uDB00\uDC00
+\uD800\uDFFF\uD801\uDFF1\uFFFF
+false 0
+
+\u1000.\uFFFF
+\u1000\uD800\uDFFF\uFFFF
+true \u1000\uD800\uDFFF\uFFFF 0
+
+//=======
+// Ranges
+//=======
+[a-\uD800\uDFFF]
+\uDFFF
+true \uDFFF 0
+
+[a-\uD800\uDFFF]
+\uD800
+true \uD800 0
+
+[a-\uD800\uDFFF]
+\uD800\uDFFF
+true \uD800\uDFFF 0
+
+[\uD800\uDC00-\uDBFF\uDFFF]
+\uDBFF
+false 0
+
+[\uD800\uDC00-\uDBFF\uDFFF]
+\uDC00
+false 0
+
+[\uD800-\uDFFF]
+\uD800\uDFFF
+false 0
+
+[\uD800-\uDFFF]
+\uDFFF\uD800
+true \uDFFF 0
+
+foo[^\uD800-\uDFFF]
+foo\uD800\uDFFF
+true foo\uD800\uDFFF 0
+
+foo[^\uD800-\uDFFF]
+foo\uDFFF\uD800
+false 0
+
+//fo\uD800[\uDC00-\uDFFF]
+
+//==================
+// Character Classes
+//==================
+// Simple class
+[ab\uD800\uDFFFcd]at
+\uD800at
+false 0
+
+[ab\uD800\uDFFFcd]at
+\uD800\uDFFFat
+true \uD800\uDFFFat 0
+
+// Negation
+[^\uD800\uDFFFcd]at
+\uD800at
+true \uD800at 0
+
+[^\uD800\uDFFFcd]at
+\uDFFFat
+true \uDFFFat 0
+
+// Inclusive range
+[\u0000-\uD800\uDFFF-\uFFFF]
+\uD800\uDFFF
+true \uD800\uDFFF 0
+
+// Unions
+[\u0000-\uD800[\uDFFF-\uFFFF]]
+\uD800\uDFFF
+false 0
+
+
+// Intersection
+[\u0000-\uFFFF&&[\uD800\uDFFF]]
+\uD800\uDFFF
+false 0
+
+[\u0000-\uFFFF&&[\uD800\uDFFF]]
+\uD800
+false 0
+
+[\u0000-\uFFFF&&[\uDFFF\uD800]]
+\uD800
+true \uD800 0
+
+[\u0000-\uFFFF&&[\uDFFF\uD800\uDC00]]
+\uDC00
+false 0
+
+[\u0000-\uDFFF&&[\uD800-\uFFFF]]
+\uD800\uDFFF
+false 0
+
+[\u0000-\uDFFF&&[\uD800-\uFFFF]]
+\uDFFF\uD800
+true \uDFFF 0
+
+// Subtraction
+[\u0000-\uD800\uDFFF&&[^\uD800\uDC00]]
+\uD800
+true \uD800 0
+
+[\u0000-\uD800\uDFFF&&[^\uD800\uDC00]]
+\uDC00
+true \uDC00 0
+
+[\u0000-\uD800\uDFFF&&[^\uD800\uDC00]]
+\uD800\uDFFF
+true \uD800\uDFFF 0
+
+[\u0000-\uD800\uDFFF&&[^\uD800\uDBFF\uDC00]]
+\uD800
+false 0
+
+[\u0000-\uD800\uDFFF&&[^\uDC00\uD800\uDBFF]]
+\uD800\uDC00
+true \uD800\uDC00 0
+
+// Quantifiers
+a\uD800\uDFFF?
+a\uD800
+true a 0
+
+a\uD800\uDFFF?
+a\uDFFF
+true a 0
+
+a\uD800\uDFFF?
+a\uD800\uDFFF
+true a\uD800\uDFFF 0
+
+a\uDFFF\uD800?
+a\uDFFF
+true a\uDFFF 0
+
+a\uDFFF\uD800?
+a\uD800
+false 0
+
+\uD800\uDFFF\uDC00?
+\uD800
+false 0
+
+\uD800\uDFFF\uDC00?
+\uD800\uDFFF
+true \uD800\uDFFF 0
+
+a\uD800\uDFFF??
+a\uDFFF
+true a 0
+
+a\uD800\uDFFF*
+a
+true a 0
+
+a\uD800\uDFFF*
+a\uD800
+true a 0
+
+\uD800\uDFFF*
+\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF
+true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
+
+\uD800\uDFFF*
+\uD800\uDFFF\uDFFF\uDFFF\uDFFF
+true \uD800\uDFFF 0
+
+\uD800*\uDFFF
+\uD800\uDFFF
+false 0
+
+a\uD800\uDFFF*
+a\uD800
+true a 0
+
+\uDFFF\uD800*
+\uDFFF
+true \uDFFF 0
+
+\uDFFF\uD800*
+\uDFFF\uD800\uD800\uD800
+true \uDFFF\uD800\uD800\uD800 0
+
+\uD800\uDFFF+
+\uD800\uDFFF\uDFFF\uDFFF
+true \uD800\uDFFF 0
+
+\uD800\uDFFF+
+\uD800
+false 0
+
+\uD800\uDFFF+
+\uD800\uDFFF
+true \uD800\uDFFF 0
+
+\uD800\uDFFF+
+\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF
+true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
+
+\uDFFF\uD800+
+\uDFFF\uD800\uDFFF\uD800
+false 0
+
+\uD800+\uDFFF
+\uD800\uDFFF
+false 0
+
+\uD800+\uDFFF
+\uD800
+false 0
+
+\uDFFF+\uD800
+\uD800
+false 0
+
+\uDFFF+\uD800
+\uDFFF\uD800
+true \uDFFF\uD800 0
+
+\uD800\uDFFF{3}
+\uD800\uDFFF\uDFFF\uDFFF
+false 0
+
+\uD800\uDFFF{3}
+\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF
+true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
+
+\uDFFF\uD800{3}
+\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800
+false 0
+
+\uDFFF\uD800{3}
+\uDFFF\uD800\uD800\uD800
+true \uDFFF\uD800\uD800\uD800 0
+
+\uD800\uDFFF{2,}
+\uD800\uDFFF
+false 0
+
+\uD800\uDFFF{2,}
+\uD800\uDFFF\uDFFF
+false 0
+
+\uD800\uDFFF{2,}
+\uD800\uDFFF\uD800\uDFFF
+true \uD800\uDFFF\uD800\uDFFF 0
+
+\uDFFF\uD800{2,}
+\uDFFF\uD800\uDFFF\uD800
+false 0
+
+\uDFFF\uD800{2,}
+\uDFFF\uD800\uD800\uD800
+true \uDFFF\uD800\uD800\uD800 0
+
+\uD800\uDFFF{3,4}
+\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF
+true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
+
+\uD800\uDFFF{3,4}
+\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800
+true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
+
+\uD800\uDFFF{3,4}
+\uD800\uDFFF\uD800\uD800\uDFFF\uD800\uDFFF
+false 0
+
+\uDFFF\uD800{3,5}
+\uDFFF\uD800\uD800\uD800\uD800\uD800\uD800\uD800
+true \uDFFF\uD800\uD800\uD800\uD800\uD800 0
+
+\uD800\uDFFF{3,5}
+\uD800\uDFFF\uDFFF\uDFFF
+false 0
+
+\uD800\uDFFF{3,5}
+\uD800\uDFFF\uD800\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF
+true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
+
+// Groupings
+(\uD800(\uDFFF))
+\uD800\uDFFF
+false 2
+
+(\uD800(\uDC00)(\uDFFF))
+\uD800\uDC00\uDFFF
+false 3
+
+((\uD800)(\uDFFF))
+\uD800\uDFFF
+false 3
+
+(\uD800(\uDFFF)\uDFFF)
+\uD800\uDFFF
+false 2
+
+(\uDFFF(\uD800)(\uDBFF))
+\uDFFF\uD800\uDBFF
+true \uDFFF\uD800\uDBFF 3 \uDFFF\uD800\uDBFF \uD800 \uDBFF
+
+(\uDFFF(\uD800)(\uDC00))
+\uDFFF\uD800\uDC00
+false 3
+
+(\uDFFF\uD800(\uDC00\uDBFF))
+\uDFFF\uD800\uDC00\uDBFF
+false 2
+
+(\uD800\uDFFF(\uDBFF)(\uDC00))
+\uD800\uDFFF\uDBFF\uDC00
+false 3
+
+(\uD800\uDFFF(\uDBFF\uDC00))
+\uD800\uDFFF\uDBFF\uDC00
+true \uD800\uDFFF\uDBFF\uDC00 2 \uD800\uDFFF\uDBFF\uDC00 \uDBFF\uDC00
diff --git a/jdk/test/java/util/regex/TestCases.txt b/jdk/test/java/util/regex/TestCases.txt
new file mode 100644
index 00000000000..fd41df33f76
--- /dev/null
+++ b/jdk/test/java/util/regex/TestCases.txt
@@ -0,0 +1,1092 @@
+//
+// Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//
+//
+// This file contains test cases for regular expressions.
+// A test case consists of three lines:
+// The first line is a pattern used in the test
+// The second line is the input to search for the pattern in
+// The third line is a concatentation of the match, the number of groups,
+// and the contents of the first four subexpressions.
+// Empty lines and lines beginning with comment slashes are ignored.
+//
+// Test unsetting of backed off groups
+^(a)?a
+a
+true a 1
+
+^(aa(bb)?)+$
+aabbaa
+true aabbaa 2 aa bb
+
+((a|b)?b)+
+b
+true b 2 b
+
+(aaa)?aaa
+aaa
+true aaa 1
+
+^(a(b)?)+$
+aba
+true aba 2 a b
+
+^(a(b(c)?)?)?abc
+abc
+true abc 3
+
+^(a(b(c))).*
+abc
+true abc 3 abc bc c
+
+// use of x modifier
+abc(?x)blah
+abcblah
+true abcblah 0
+
+abc(?x) blah
+abcblah
+true abcblah 0
+
+abc(?x) blah blech
+abcblahblech
+true abcblahblech 0
+
+abc(?x) blah # ignore comment
+abcblah
+true abcblah 0
+
+// Simple alternation
+a|b
+a
+true a 0
+
+a|b
+z
+false 0
+
+a|b
+b
+true b 0
+
+a|b|cd
+cd
+true cd 0
+
+a|ad
+ad
+true a 0
+
+z(a|ac)b
+zacb
+true zacb 1 ac
+
+// Simple char class
+[abc]+
+ababab
+true ababab 0
+
+[abc]+
+defg
+false 0
+
+[abc]+[def]+[ghi]+
+zzzaaddggzzz
+true aaddgg 0
+
+// Range char class
+[a-g]+
+zzzggg
+true ggg 0
+
+[a-g]+
+mmm
+false 0
+
+[a-]+
+za-9z
+true a- 0
+
+[a-\\u4444]+
+za-9z
+true za 0
+
+// Negated char class
+[^abc]+
+ababab
+false 0
+
+[^abc]+
+aaabbbcccdefg
+true defg 0
+
+// Making sure a ^ not in first position matches literal ^
+[abc^b]
+b
+true b 0
+
+[abc^b]
+^
+true ^ 0
+
+// Class union and intersection
+[abc[def]]
+b
+true b 0
+
+[abc[def]]
+e
+true e 0
+
+[a-d[0-9][m-p]]
+a
+true a 0
+
+[a-d[0-9][m-p]]
+o
+true o 0
+
+[a-d[0-9][m-p]]
+4
+true 4 0
+
+[a-d[0-9][m-p]]
+e
+false 0
+
+[a-d[0-9][m-p]]
+u
+false 0
+
+[[a-d][0-9][m-p]]
+b
+true b 0
+
+[[a-d][0-9][m-p]]
+z
+false 0
+
+[a-c[d-f[g-i]]]
+a
+true a 0
+
+[a-c[d-f[g-i]]]
+e
+true e 0
+
+[a-c[d-f[g-i]]]
+h
+true h 0
+
+[a-c[d-f[g-i]]]
+m
+false 0
+
+[a-c[d-f[g-i]]m]
+m
+true m 0
+
+[abc[def]ghi]
+a
+true a 0
+
+[abc[def]ghi]
+d
+true d 0
+
+[abc[def]ghi]
+h
+true h 0
+
+[abc[def]ghi]
+w
+false 0
+
+[a-c&&[d-f]]
+a
+false 0
+
+[a-c&&[d-f]]
+e
+false 0
+
+[a-c&&[d-f]]
+z
+false 0
+
+[[a-c]&&[d-f]]
+a
+false 0
+
+[[a-c]&&[d-f]]
+e
+false 0
+
+[[a-c]&&[d-f]]
+z
+false 0
+
+[a-c&&d-f]
+a
+false 0
+
+[a-m&&m-z]
+m
+true m 0
+
+[a-m&&m-z&&a-c]
+m
+false 0
+
+[a-m&&m-z&&a-z]
+m
+true m 0
+
+[[a-m]&&[m-z]]
+a
+false 0
+
+[[a-m]&&[m-z]]
+m
+true m 0
+
+[[a-m]&&[m-z]]
+z
+false 0
+
+[[a-m]&&[^a-c]]
+a
+false 0
+
+[[a-m]&&[^a-c]]
+d
+true d 0
+
+[a-m&&[^a-c]]
+a
+false 0
+
+[a-m&&[^a-c]]
+d
+true d 0
+
+[a-cd-f&&[d-f]]
+a
+false 0
+
+[a-cd-f&&[d-f]]
+e
+true e 0
+
+[[a-c]&&d-fa-c]
+a
+true a 0
+
+[[a-c]&&[d-f][a-c]]
+a
+true a 0
+
+[[a-c][d-f]&&abc]
+a
+true a 0
+
+[[a-c][d-f]&&abc[def]]
+e
+true e 0
+
+[[a-c]&&[b-d]&&[c-e]]
+a
+false 0
+
+[[a-c]&&[b-d]&&[c-e]]
+c
+true c 0
+
+[[a-c]&&[b-d][c-e]&&[u-z]]
+c
+false 0
+
+[abc[^bcd]]
+a
+true a 0
+
+[abc[^bcd]]
+d
+false 0
+
+[a-c&&a-d&&a-eghi]
+b
+true b 0
+
+[a-c&&a-d&&a-eghi]
+g
+false 0
+
+[[a[b]]&&[b[a]]]
+a
+true a 0
+
+[[a]&&[b][c][a]&&[^d]]
+a
+true a 0
+
+[[a]&&[b][c][a]&&[^d]]
+d
+false 0
+
+[[[a-d]&&[c-f]]]
+a
+false 0
+
+[[[a-d]&&[c-f]]]
+c
+true c 0
+
+[[[a-d]&&[c-f]]&&[c]]
+c
+true c 0
+
+[[[a-d]&&[c-f]]&&[c]&&c]
+c
+true c 0
+
+[[[a-d]&&[c-f]]&&[c]&&c&&c]
+c
+true c 0
+
+[[[a-d]&&[c-f]]&&[c]&&c&&[cde]]
+c
+true c 0
+
+[z[abc&&bcd]]
+c
+true c 0
+
+[z[abc&&bcd]&&[u-z]]
+z
+true z 0
+
+[x[abc&&bcd[z]]&&[u-z]]
+z
+false 0
+
+[x[[wz]abc&&bcd[z]]&&[u-z]]
+z
+true z 0
+
+[[abc]&&[def]abc]
+a
+true a 0
+
+[[abc]&&[def]xyz[abc]]
+a
+true a 0
+
+\pL
+a
+true a 0
+
+\pL
+7
+false 0
+
+\p{L}
+a
+true a 0
+
+\p{LC}
+a
+true a 0
+
+\p{LC}
+A
+true A 0
+
+\p{IsL}
+a
+true a 0
+
+\p{IsLC}
+a
+true a 0
+
+\p{IsLC}
+A
+true A 0
+
+\p{IsLC}
+9
+false 0
+
+\P{IsLC}
+9
+true 9 0
+
+// Guillemet left is initial quote punctuation
+\p{Pi}
+\u00ab
+true \u00ab 0
+
+\P{Pi}
+\u00ac
+true \u00ac 0
+
+// Guillemet right is final quote punctuation
+\p{IsPf}
+\u00bb
+true \u00bb 0
+
+\p{P}
+\u00bb
+true \u00bb 0
+
+\p{P}+
+\u00bb
+true \u00bb 0
+
+\P{IsPf}
+\u00bc
+true \u00bc 0
+
+\P{IsP}
+\u00bc
+true \u00bc 0
+
+\p{L1}
+\u00bc
+true \u00bc 0
+
+\p{L1}+
+\u00bc
+true \u00bc 0
+
+\p{L1}
+\u02bc
+false 0
+
+\p{ASCII}
+a
+true a 0
+
+\p{IsASCII}
+a
+true a 0
+
+\p{IsASCII}
+\u0370
+false 0
+
+\pLbc
+abc
+true abc 0
+
+a[r\p{InGreek}]c
+a\u0370c
+true a\u0370c 0
+
+a\p{InGreek}
+a\u0370
+true a\u0370 0
+
+a\P{InGreek}
+a\u0370
+false 0
+
+a\P{InGreek}
+ab
+true ab 0
+
+a{^InGreek}
+-
+error
+
+a\p{^InGreek}
+-
+error
+
+a\P{^InGreek}
+-
+error
+
+a\p{InGreek}
+a\u0370
+true a\u0370 0
+
+a[\p{InGreek}]c
+a\u0370c
+true a\u0370c 0
+
+a[\P{InGreek}]c
+a\u0370c
+false 0
+
+a[\P{InGreek}]c
+abc
+true abc 0
+
+a[{^InGreek}]c
+anc
+true anc 0
+
+a[{^InGreek}]c
+azc
+false 0
+
+a[\p{^InGreek}]c
+-
+error
+
+a[\P{^InGreek}]c
+-
+error
+
+a[\p{InGreek}]
+a\u0370
+true a\u0370 0
+
+a[r\p{InGreek}]c
+arc
+true arc 0
+
+a[\p{InGreek}r]c
+arc
+true arc 0
+
+a[r\p{InGreek}]c
+arc
+true arc 0
+
+a[^\p{InGreek}]c
+a\u0370c
+false 0
+
+a[^\P{InGreek}]c
+a\u0370c
+true a\u0370c 0
+
+a[\p{InGreek}&&[^\u0370]]c
+a\u0370c
+false 0
+
+// Test the dot metacharacter
+a.c.+
+a#c%&
+true a#c%& 0
+
+ab.
+ab\n
+false 0
+
+(?s)ab.
+ab\n
+true ab\n 0
+
+a[\p{L}&&[\P{InGreek}]]c
+a\u6000c
+true a\u6000c 0
+
+a[\p{L}&&[\P{InGreek}]]c
+arc
+true arc 0
+
+a[\p{L}&&[\P{InGreek}]]c
+a\u0370c
+false 0
+
+a\p{InGreek}c
+a\u0370c
+true a\u0370c 0
+
+a\p{Sc}
+a$
+true a$ 0
+
+// Test the word char escape sequence
+ab\wc
+abcc
+true abcc 0
+
+\W\w\W
+#r#
+true #r# 0
+
+\W\w\W
+rrrr#ggg
+false 0
+
+abc[\w]
+abcd
+true abcd 0
+
+abc[\sdef]*
+abc def
+true abc def 0
+
+abc[\sy-z]*
+abc y z
+true abc y z 0
+
+abc[a-d\sm-p]*
+abcaa mn p
+true abcaa mn p 0
+
+// Test the whitespace escape sequence
+ab\sc
+ab c
+true ab c 0
+
+\s\s\s
+blah err
+false 0
+
+\S\S\s
+blah err
+true ah 0
+
+// Test the digit escape sequence
+ab\dc
+ab9c
+true ab9c 0
+
+\d\d\d
+blah45
+false 0
+
+// Test the caret metacharacter
+^abc
+abcdef
+true abc 0
+
+^abc
+bcdabc
+false 0
+
+// Greedy ? metacharacter
+a?b
+aaaab
+true ab 0
+
+a?b
+b
+true b 0
+
+a?b
+aaaccc
+false 0
+
+.?b
+aaaab
+true ab 0
+
+// Reluctant ? metacharacter
+a??b
+aaaab
+true ab 0
+
+a??b
+b
+true b 0
+
+a??b
+aaaccc
+false 0
+
+.??b
+aaaab
+true ab 0
+
+// Possessive ? metacharacter
+a?+b
+aaaab
+true ab 0
+
+a?+b
+b
+true b 0
+
+a?+b
+aaaccc
+false 0
+
+.?+b
+aaaab
+true ab 0
+
+// Greedy + metacharacter
+a+b
+aaaab
+true aaaab 0
+
+a+b
+b
+false 0
+
+a+b
+aaaccc
+false 0
+
+.+b
+aaaab
+true aaaab 0
+
+// Reluctant + metacharacter
+a+?b
+aaaab
+true aaaab 0
+
+a+?b
+b
+false 0
+
+a+?b
+aaaccc
+false 0
+
+.+?b
+aaaab
+true aaaab 0
+
+// Possessive + metacharacter
+a++b
+aaaab
+true aaaab 0
+
+a++b
+b
+false 0
+
+a++b
+aaaccc
+false 0
+
+.++b
+aaaab
+false 0
+
+// Greedy Repetition
+a{2,3}
+a
+false 0
+
+a{2,3}
+aa
+true aa 0
+
+a{2,3}
+aaa
+true aaa 0
+
+a{2,3}
+aaaa
+true aaa 0
+
+a{3,}
+zzzaaaazzz
+true aaaa 0
+
+a{3,}
+zzzaazzz
+false 0
+
+// Reluctant Repetition
+a{2,3}?
+a
+false 0
+
+a{2,3}?
+aa
+true aa 0
+
+a{2,3}?
+aaa
+true aa 0
+
+a{2,3}?
+aaaa
+true aa 0
+
+// Zero width Positive lookahead
+abc(?=d)
+zzzabcd
+true abc 0
+
+abc(?=d)
+zzzabced
+false 0
+
+// Zero width Negative lookahead
+abc(?!d)
+zzabcd
+false 0
+
+abc(?!d)
+zzabced
+true abc 0
+
+// Zero width Positive lookbehind
+\w(?<=a)
+###abc###
+true a 0
+
+\w(?<=a)
+###ert###
+false 0
+
+// Zero width Negative lookbehind
+(?3
+// So that the BM optimization is part of test
+\Q***\Eabc
+***abc
+true ***abc 0
+
+bl\Q***\Eabc
+bl***abc
+true bl***abc 0
+
+\Q***abc
+***abc
+true ***abc 0
+
+blah\Q***\Eabc
+blah***abc
+true blah***abc 0
+
+\Q***abc
+***abc
+true ***abc 0
+
+\Q*ab
+*ab
+true *ab 0
+
+blah\Q***abc
+blah***abc
+true blah***abc 0
+
+bla\Q***abc
+bla***abc
+true bla***abc 0
+
+// Escapes in char classes
+[ab\Qdef\E]
+d
+true d 0
+
+[ab\Q[\E]
+[
+true [ 0
+
+[\Q]\E]
+]
+true ] 0
+
+[\Q\\E]
+\
+true \ 0
+
+[\Q(\E]
+(
+true ( 0
+
+[\n-#]
+!
+true ! 0
+
+[\n-#]
+-
+false 0
+
+[\w-#]
+!
+false 0
+
+[\w-#]
+a
+true a 0
+
+[\w-#]
+-
+true - 0
+
+[\w-#]
+#
+true # 0
+
+[\043]+
+blahblah#blech
+true # 0
+
+[\042-\044]+
+blahblah#blech
+true # 0
+
+[\u1234-\u1236]
+blahblah\u1235blech
+true \u1235 0
+
+[^\043]*
+blahblah#blech
+true blahblah 0
+
+(|f)?+
+foo
+true 1